1 #ifndef lint 2 static char vcid[] = "$Id: mpibaij.c,v 1.59 1997/03/26 01:36:18 bsmith Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 15 extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 16 extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 17 extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 18 extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 19 extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 20 extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 21 extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 22 23 24 /* 25 Local utility routine that creates a mapping from the global column 26 number to the local number in the off-diagonal part of the local 27 storage of the matrix. This is done in a non scable way since the 28 length of colmap equals the global matrix length. 29 */ 30 #undef __FUNC__ 31 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 32 static int CreateColmap_MPIBAIJ_Private(Mat mat) 33 { 34 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 35 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 36 int nbs = B->nbs,i,bs=B->bs;; 37 38 baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 39 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 40 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 41 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 42 return 0; 43 } 44 45 #undef __FUNC__ 46 #define __FUNC__ "MatGetRowIJ_MPIBAIJ(" 47 static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 48 PetscTruth *done) 49 { 50 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 51 int ierr; 52 if (aij->size == 1) { 53 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54 } else SETERRQ(1,0,"not supported in parallel"); 55 return 0; 56 } 57 58 #undef __FUNC__ 59 #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ" 60 static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 61 PetscTruth *done) 62 { 63 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 64 int ierr; 65 if (aij->size == 1) { 66 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 67 } else SETERRQ(1,0,"not supported in parallel"); 68 return 0; 69 } 70 #define CHUNKSIZE 10 71 72 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 73 { \ 74 \ 75 brow = row/bs; \ 76 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 77 rmax = imax[brow]; nrow = ailen[brow]; \ 78 bcol = col/bs; \ 79 ridx = row % bs; cidx = col % bs; \ 80 low = 0; high = nrow; \ 81 while (high-low > 3) { \ 82 t = (low+high)/2; \ 83 if (rp[t] > bcol) high = t; \ 84 else low = t; \ 85 } \ 86 for ( _i=low; _i<high; _i++ ) { \ 87 if (rp[_i] > bcol) break; \ 88 if (rp[_i] == bcol) { \ 89 bap = ap + bs2*_i + bs*cidx + ridx; \ 90 if (addv == ADD_VALUES) *bap += value; \ 91 else *bap = value; \ 92 goto _noinsert; \ 93 } \ 94 } \ 95 if (a->nonew) goto _noinsert; \ 96 if (nrow >= rmax) { \ 97 /* there is no extra room in row, therefore enlarge */ \ 98 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 99 Scalar *new_a; \ 100 \ 101 /* malloc new storage space */ \ 102 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 103 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 104 new_j = (int *) (new_a + bs2*new_nz); \ 105 new_i = new_j + new_nz; \ 106 \ 107 /* copy over old data into new slots */ \ 108 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 109 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 110 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 111 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 112 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 113 len*sizeof(int)); \ 114 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 115 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 116 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 117 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 118 /* free up old matrix storage */ \ 119 PetscFree(a->a); \ 120 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 121 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 122 a->singlemalloc = 1; \ 123 \ 124 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 125 rmax = imax[brow] = imax[brow] + CHUNKSIZE; \ 126 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 127 a->maxnz += bs2*CHUNKSIZE; \ 128 a->reallocs++; \ 129 a->nz++; \ 130 } \ 131 N = nrow++ - 1; \ 132 /* shift up all the later entries in this row */ \ 133 for ( ii=N; ii>=_i; ii-- ) { \ 134 rp[ii+1] = rp[ii]; \ 135 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 136 } \ 137 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 138 rp[_i] = bcol; \ 139 ap[bs2*_i + bs*cidx + ridx] = value; \ 140 _noinsert:; \ 141 ailen[brow] = nrow; \ 142 } 143 144 extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 145 #undef __FUNC__ 146 #define __FUNC__ "MatSetValues_MPIBAIJ" 147 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 148 { 149 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 150 Scalar value; 151 int ierr,i,j,row,col; 152 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 153 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 154 int cend_orig=baij->cend_bs,bs=baij->bs; 155 156 /* Some Variables required in the macro */ 157 Mat A = baij->A; 158 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 159 int *rp,ii,nrow,_i,rmax,N; 160 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 161 int *aj=a->j,brow,bcol; 162 int low,high,t,ridx,cidx,bs2=a->bs2; 163 Scalar *ap,*aa=a->a,*bap; 164 165 for ( i=0; i<m; i++ ) { 166 #if defined(PETSC_BOPT_g) 167 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 168 if (im[i] >= baij->M) SETERRQ(1,0,"Row too large"); 169 #endif 170 if (im[i] >= rstart_orig && im[i] < rend_orig) { 171 row = im[i] - rstart_orig; 172 for ( j=0; j<n; j++ ) { 173 if (in[j] >= cstart_orig && in[j] < cend_orig){ 174 col = in[j] - cstart_orig; 175 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 176 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 177 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 178 } 179 #if defined(PETSC_BOPT_g) 180 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 181 else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");} 182 #endif 183 else { 184 if (mat->was_assembled) { 185 if (!baij->colmap) { 186 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 187 } 188 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 189 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 190 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 191 col = in[j]; 192 } 193 } 194 else col = in[j]; 195 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 196 ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 197 } 198 } 199 } 200 else { 201 if (roworiented && !baij->donotstash) { 202 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 203 } 204 else { 205 if (!baij->donotstash) { 206 row = im[i]; 207 for ( j=0; j<n; j++ ) { 208 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 209 } 210 } 211 } 212 } 213 } 214 return 0; 215 } 216 217 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 218 #undef __FUNC__ 219 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 220 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 221 { 222 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 223 Scalar *value,*barray=baij->barray; 224 int ierr,i,j,ii,jj,row,col,k,l; 225 int roworiented = baij->roworiented,rstart=baij->rstart ; 226 int rend=baij->rend,cstart=baij->cstart,stepval; 227 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 228 229 230 if(!barray) { 231 barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 232 baij->barray = barray; 233 } 234 235 if (roworiented) { 236 stepval = (n-1)*bs; 237 } else { 238 stepval = (m-1)*bs; 239 } 240 for ( i=0; i<m; i++ ) { 241 #if defined(PETSC_BOPT_g) 242 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 243 if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large"); 244 #endif 245 if (im[i] >= rstart && im[i] < rend) { 246 row = im[i] - rstart; 247 for ( j=0; j<n; j++ ) { 248 if (roworiented) { 249 value = v + i*(stepval+bs)*bs + j*bs; 250 } else { 251 value = v + j*(stepval+bs)*bs + i*bs; 252 } 253 for ( ii=0; ii<bs; ii++,value+=stepval ) 254 for (jj=0; jj<bs; jj++ ) 255 *barray++ = *value++; 256 barray -=bs2; 257 258 if (in[j] >= cstart && in[j] < cend){ 259 col = in[j] - cstart; 260 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 261 } 262 #if defined(PETSC_BOPT_g) 263 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 264 else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");} 265 #endif 266 else { 267 if (mat->was_assembled) { 268 if (!baij->colmap) { 269 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 270 } 271 col = baij->colmap[in[j]] - 1; 272 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 273 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 274 col = in[j]; 275 } 276 } 277 else col = in[j]; 278 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 279 } 280 } 281 } 282 else { 283 if (!baij->donotstash) { 284 if (roworiented ) { 285 row = im[i]*bs; 286 value = v + i*(stepval+bs)*bs; 287 for ( j=0; j<bs; j++,row++ ) { 288 for ( k=0; k<n; k++ ) { 289 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 290 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 291 } 292 } 293 } 294 } 295 else { 296 for ( j=0; j<n; j++ ) { 297 value = v + j*(stepval+bs)*bs + i*bs; 298 col = in[j]*bs; 299 for ( k=0; k<bs; k++,col++,value+=stepval) { 300 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 301 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 302 } 303 } 304 } 305 306 } 307 } 308 } 309 } 310 return 0; 311 } 312 313 #undef __FUNC__ 314 #define __FUNC__ "MatGetValues_MPIBAIJ" 315 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 316 { 317 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 318 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 319 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 320 321 for ( i=0; i<m; i++ ) { 322 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 323 if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large"); 324 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 325 row = idxm[i] - bsrstart; 326 for ( j=0; j<n; j++ ) { 327 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 328 if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large"); 329 if (idxn[j] >= bscstart && idxn[j] < bscend){ 330 col = idxn[j] - bscstart; 331 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 332 } 333 else { 334 if (!baij->colmap) { 335 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 if((baij->colmap[idxn[j]/bs]-1 < 0) || 338 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 339 else { 340 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 341 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 342 } 343 } 344 } 345 } 346 else { 347 SETERRQ(1,0,"Only local values currently supported"); 348 } 349 } 350 return 0; 351 } 352 353 #undef __FUNC__ 354 #define __FUNC__ "MatNorm_MPIBAIJ" 355 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 356 { 357 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 358 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 359 int ierr, i,bs2=baij->bs2; 360 double sum = 0.0; 361 Scalar *v; 362 363 if (baij->size == 1) { 364 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 365 } else { 366 if (type == NORM_FROBENIUS) { 367 v = amat->a; 368 for (i=0; i<amat->nz*bs2; i++ ) { 369 #if defined(PETSC_COMPLEX) 370 sum += real(conj(*v)*(*v)); v++; 371 #else 372 sum += (*v)*(*v); v++; 373 #endif 374 } 375 v = bmat->a; 376 for (i=0; i<bmat->nz*bs2; i++ ) { 377 #if defined(PETSC_COMPLEX) 378 sum += real(conj(*v)*(*v)); v++; 379 #else 380 sum += (*v)*(*v); v++; 381 #endif 382 } 383 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 384 *norm = sqrt(*norm); 385 } 386 else 387 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 388 } 389 return 0; 390 } 391 392 #undef __FUNC__ 393 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 394 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 395 { 396 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 397 MPI_Comm comm = mat->comm; 398 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 399 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 400 MPI_Request *send_waits,*recv_waits; 401 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 402 InsertMode addv; 403 Scalar *rvalues,*svalues; 404 405 /* make sure all processors are either in INSERTMODE or ADDMODE */ 406 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 407 if (addv == (ADD_VALUES|INSERT_VALUES)) { 408 SETERRQ(1,0,"Some processors inserted others added"); 409 } 410 mat->insertmode = addv; /* in case this processor had no cache */ 411 412 /* first count number of contributors to each processor */ 413 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 414 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 415 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 416 for ( i=0; i<baij->stash.n; i++ ) { 417 idx = baij->stash.idx[i]; 418 for ( j=0; j<size; j++ ) { 419 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 420 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 421 } 422 } 423 } 424 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 425 426 /* inform other processors of number of messages and max length*/ 427 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 428 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 429 nreceives = work[rank]; 430 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 431 nmax = work[rank]; 432 PetscFree(work); 433 434 /* post receives: 435 1) each message will consist of ordered pairs 436 (global index,value) we store the global index as a double 437 to simplify the message passing. 438 2) since we don't know how long each individual message is we 439 allocate the largest needed buffer for each receive. Potentially 440 this is a lot of wasted space. 441 442 443 This could be done better. 444 */ 445 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 446 CHKPTRQ(rvalues); 447 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 448 CHKPTRQ(recv_waits); 449 for ( i=0; i<nreceives; i++ ) { 450 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 451 comm,recv_waits+i); 452 } 453 454 /* do sends: 455 1) starts[i] gives the starting index in svalues for stuff going to 456 the ith processor 457 */ 458 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 459 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 460 CHKPTRQ(send_waits); 461 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 462 starts[0] = 0; 463 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 464 for ( i=0; i<baij->stash.n; i++ ) { 465 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 466 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 467 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 468 } 469 PetscFree(owner); 470 starts[0] = 0; 471 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 472 count = 0; 473 for ( i=0; i<size; i++ ) { 474 if (procs[i]) { 475 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 476 comm,send_waits+count++); 477 } 478 } 479 PetscFree(starts); PetscFree(nprocs); 480 481 /* Free cache space */ 482 PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 483 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 484 485 baij->svalues = svalues; baij->rvalues = rvalues; 486 baij->nsends = nsends; baij->nrecvs = nreceives; 487 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 488 baij->rmax = nmax; 489 490 return 0; 491 } 492 493 494 #undef __FUNC__ 495 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 496 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 497 { 498 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 499 MPI_Status *send_status,recv_status; 500 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 501 int bs=baij->bs,row,col,other_disassembled; 502 Scalar *values,val; 503 InsertMode addv = mat->insertmode; 504 505 /* wait on receives */ 506 while (count) { 507 MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 508 /* unpack receives into our local space */ 509 values = baij->rvalues + 3*imdex*baij->rmax; 510 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 511 n = n/3; 512 for ( i=0; i<n; i++ ) { 513 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 514 col = (int) PetscReal(values[3*i+1]); 515 val = values[3*i+2]; 516 if (col >= baij->cstart*bs && col < baij->cend*bs) { 517 col -= baij->cstart*bs; 518 MatSetValues(baij->A,1,&row,1,&col,&val,addv); 519 } 520 else { 521 if (mat->was_assembled) { 522 if (!baij->colmap) { 523 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 524 } 525 col = (baij->colmap[col/bs]-1)*bs + col%bs; 526 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 527 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 528 col = (int) PetscReal(values[3*i+1]); 529 } 530 } 531 MatSetValues(baij->B,1,&row,1,&col,&val,addv); 532 } 533 } 534 count--; 535 } 536 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 537 538 /* wait on sends */ 539 if (baij->nsends) { 540 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 541 CHKPTRQ(send_status); 542 MPI_Waitall(baij->nsends,baij->send_waits,send_status); 543 PetscFree(send_status); 544 } 545 PetscFree(baij->send_waits); PetscFree(baij->svalues); 546 547 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 548 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 549 550 /* determine if any processor has disassembled, if so we must 551 also disassemble ourselfs, in order that we may reassemble. */ 552 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 553 if (mat->was_assembled && !other_disassembled) { 554 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 555 } 556 557 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 558 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 559 } 560 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 561 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 562 563 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 564 return 0; 565 } 566 567 #undef __FUNC__ 568 #define __FUNC__ "MatView_MPIBAIJ_Binary" 569 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 570 { 571 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 572 int ierr; 573 574 if (baij->size == 1) { 575 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 576 } 577 else SETERRQ(1,0,"Only uniprocessor output supported"); 578 return 0; 579 } 580 581 #undef __FUNC__ 582 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 583 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 584 { 585 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 586 int ierr, format,rank,bs = baij->bs; 587 FILE *fd; 588 ViewerType vtype; 589 590 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 591 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 592 ierr = ViewerGetFormat(viewer,&format); 593 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 594 MatInfo info; 595 MPI_Comm_rank(mat->comm,&rank); 596 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 597 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 598 PetscSequentialPhaseBegin(mat->comm,1); 599 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 600 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 601 baij->bs,(int)info.memory); 602 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 603 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 604 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 605 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 606 fflush(fd); 607 PetscSequentialPhaseEnd(mat->comm,1); 608 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 609 return 0; 610 } 611 else if (format == VIEWER_FORMAT_ASCII_INFO) { 612 PetscPrintf(mat->comm," block size is %d\n",bs); 613 return 0; 614 } 615 } 616 617 if (vtype == DRAW_VIEWER) { 618 Draw draw; 619 PetscTruth isnull; 620 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 621 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 622 } 623 624 if (vtype == ASCII_FILE_VIEWER) { 625 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 626 PetscSequentialPhaseBegin(mat->comm,1); 627 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 628 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 629 baij->cstart*bs,baij->cend*bs); 630 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 631 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 632 fflush(fd); 633 PetscSequentialPhaseEnd(mat->comm,1); 634 } 635 else { 636 int size = baij->size; 637 rank = baij->rank; 638 if (size == 1) { 639 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 640 } 641 else { 642 /* assemble the entire matrix onto first processor. */ 643 Mat A; 644 Mat_SeqBAIJ *Aloc; 645 int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 646 int mbs=baij->mbs; 647 Scalar *a; 648 649 if (!rank) { 650 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 651 CHKERRQ(ierr); 652 } 653 else { 654 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 655 CHKERRQ(ierr); 656 } 657 PLogObjectParent(mat,A); 658 659 /* copy over the A part */ 660 Aloc = (Mat_SeqBAIJ*) baij->A->data; 661 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 662 row = baij->rstart; 663 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 664 665 for ( i=0; i<mbs; i++ ) { 666 rvals[0] = bs*(baij->rstart + i); 667 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 668 for ( j=ai[i]; j<ai[i+1]; j++ ) { 669 col = (baij->cstart+aj[j])*bs; 670 for (k=0; k<bs; k++ ) { 671 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 672 col++; a += bs; 673 } 674 } 675 } 676 /* copy over the B part */ 677 Aloc = (Mat_SeqBAIJ*) baij->B->data; 678 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 679 row = baij->rstart*bs; 680 for ( i=0; i<mbs; i++ ) { 681 rvals[0] = bs*(baij->rstart + i); 682 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 683 for ( j=ai[i]; j<ai[i+1]; j++ ) { 684 col = baij->garray[aj[j]]*bs; 685 for (k=0; k<bs; k++ ) { 686 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 687 col++; a += bs; 688 } 689 } 690 } 691 PetscFree(rvals); 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 694 if (!rank) { 695 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 696 } 697 ierr = MatDestroy(A); CHKERRQ(ierr); 698 } 699 } 700 return 0; 701 } 702 703 704 705 #undef __FUNC__ 706 #define __FUNC__ "MatView_MPIBAIJ" 707 int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 708 { 709 Mat mat = (Mat) obj; 710 int ierr; 711 ViewerType vtype; 712 713 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 714 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 715 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 716 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 717 } 718 else if (vtype == BINARY_FILE_VIEWER) { 719 return MatView_MPIBAIJ_Binary(mat,viewer); 720 } 721 return 0; 722 } 723 724 #undef __FUNC__ 725 #define __FUNC__ "MatDestroy_MPIBAIJ" 726 int MatDestroy_MPIBAIJ(PetscObject obj) 727 { 728 Mat mat = (Mat) obj; 729 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 730 int ierr; 731 732 #if defined(PETSC_LOG) 733 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 734 #endif 735 736 PetscFree(baij->rowners); 737 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 738 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 739 if (baij->colmap) PetscFree(baij->colmap); 740 if (baij->garray) PetscFree(baij->garray); 741 if (baij->lvec) VecDestroy(baij->lvec); 742 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 743 if (baij->rowvalues) PetscFree(baij->rowvalues); 744 if (baij->barray) PetscFree(baij->barray); 745 PetscFree(baij); 746 if (mat->mapping) { 747 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 748 } 749 PLogObjectDestroy(mat); 750 PetscHeaderDestroy(mat); 751 return 0; 752 } 753 754 #undef __FUNC__ 755 #define __FUNC__ "MatMult_MPIBAIJ" 756 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 757 { 758 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 759 int ierr, nt; 760 761 VecGetLocalSize_Fast(xx,nt); 762 if (nt != a->n) { 763 SETERRQ(1,0,"Incompatible partition of A and xx"); 764 } 765 VecGetLocalSize_Fast(yy,nt); 766 if (nt != a->m) { 767 SETERRQ(1,0,"Incompatible parition of A and yy"); 768 } 769 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 770 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 771 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 772 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 773 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 774 return 0; 775 } 776 777 #undef __FUNC__ 778 #define __FUNC__ "MatMultAdd_MPIBAIJ" 779 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 780 { 781 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 782 int ierr; 783 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 784 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 785 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 786 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 787 return 0; 788 } 789 790 #undef __FUNC__ 791 #define __FUNC__ "MatMultTrans_MPIBAIJ" 792 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 793 { 794 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 795 int ierr; 796 797 /* do nondiagonal part */ 798 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 799 /* send it on its way */ 800 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 801 /* do local part */ 802 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 803 /* receive remote parts: note this assumes the values are not actually */ 804 /* inserted in yy until the next line, which is true for my implementation*/ 805 /* but is not perhaps always true. */ 806 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 807 return 0; 808 } 809 810 #undef __FUNC__ 811 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 812 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 813 { 814 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 815 int ierr; 816 817 /* do nondiagonal part */ 818 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 819 /* send it on its way */ 820 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 821 /* do local part */ 822 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 823 /* receive remote parts: note this assumes the values are not actually */ 824 /* inserted in yy until the next line, which is true for my implementation*/ 825 /* but is not perhaps always true. */ 826 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 827 return 0; 828 } 829 830 /* 831 This only works correctly for square matrices where the subblock A->A is the 832 diagonal block 833 */ 834 #undef __FUNC__ 835 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 836 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 837 { 838 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 839 if (a->M != a->N) 840 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 841 return MatGetDiagonal(a->A,v); 842 } 843 844 #undef __FUNC__ 845 #define __FUNC__ "MatScale_MPIBAIJ" 846 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 847 { 848 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 849 int ierr; 850 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 851 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 852 return 0; 853 } 854 855 #undef __FUNC__ 856 #define __FUNC__ "MatGetSize_MPIBAIJ" 857 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 858 { 859 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 860 *m = mat->M; *n = mat->N; 861 return 0; 862 } 863 864 #undef __FUNC__ 865 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 866 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 867 { 868 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 869 *m = mat->m; *n = mat->N; 870 return 0; 871 } 872 873 #undef __FUNC__ 874 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 875 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 876 { 877 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 878 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 879 return 0; 880 } 881 882 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 883 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 884 885 #undef __FUNC__ 886 #define __FUNC__ "MatGetRow_MPIBAIJ" 887 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 888 { 889 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 890 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 891 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 892 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 893 int *cmap, *idx_p,cstart = mat->cstart; 894 895 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 896 mat->getrowactive = PETSC_TRUE; 897 898 if (!mat->rowvalues && (idx || v)) { 899 /* 900 allocate enough space to hold information from the longest row. 901 */ 902 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 903 int max = 1,mbs = mat->mbs,tmp; 904 for ( i=0; i<mbs; i++ ) { 905 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 906 if (max < tmp) { max = tmp; } 907 } 908 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 909 CHKPTRQ(mat->rowvalues); 910 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 911 } 912 913 914 if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows") 915 lrow = row - brstart; 916 917 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 918 if (!v) {pvA = 0; pvB = 0;} 919 if (!idx) {pcA = 0; if (!v) pcB = 0;} 920 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 921 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 922 nztot = nzA + nzB; 923 924 cmap = mat->garray; 925 if (v || idx) { 926 if (nztot) { 927 /* Sort by increasing column numbers, assuming A and B already sorted */ 928 int imark = -1; 929 if (v) { 930 *v = v_p = mat->rowvalues; 931 for ( i=0; i<nzB; i++ ) { 932 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 933 else break; 934 } 935 imark = i; 936 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 937 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 938 } 939 if (idx) { 940 *idx = idx_p = mat->rowindices; 941 if (imark > -1) { 942 for ( i=0; i<imark; i++ ) { 943 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 944 } 945 } else { 946 for ( i=0; i<nzB; i++ ) { 947 if (cmap[cworkB[i]/bs] < cstart) 948 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 949 else break; 950 } 951 imark = i; 952 } 953 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 954 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 955 } 956 } 957 else { 958 if (idx) *idx = 0; 959 if (v) *v = 0; 960 } 961 } 962 *nz = nztot; 963 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 964 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 965 return 0; 966 } 967 968 #undef __FUNC__ 969 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 970 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 971 { 972 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 973 if (baij->getrowactive == PETSC_FALSE) { 974 SETERRQ(1,0,"MatGetRow not called"); 975 } 976 baij->getrowactive = PETSC_FALSE; 977 return 0; 978 } 979 980 #undef __FUNC__ 981 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 982 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 983 { 984 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 985 *bs = baij->bs; 986 return 0; 987 } 988 989 #undef __FUNC__ 990 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 991 int MatZeroEntries_MPIBAIJ(Mat A) 992 { 993 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 994 int ierr; 995 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 996 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 997 return 0; 998 } 999 1000 #undef __FUNC__ 1001 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1002 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1003 { 1004 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1005 Mat A = a->A, B = a->B; 1006 int ierr; 1007 double isend[5], irecv[5]; 1008 1009 info->rows_global = (double)a->M; 1010 info->columns_global = (double)a->N; 1011 info->rows_local = (double)a->m; 1012 info->columns_local = (double)a->N; 1013 info->block_size = (double)a->bs; 1014 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1015 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1016 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1017 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1018 if (flag == MAT_LOCAL) { 1019 info->nz_used = isend[0]; 1020 info->nz_allocated = isend[1]; 1021 info->nz_unneeded = isend[2]; 1022 info->memory = isend[3]; 1023 info->mallocs = isend[4]; 1024 } else if (flag == MAT_GLOBAL_MAX) { 1025 MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm); 1026 info->nz_used = irecv[0]; 1027 info->nz_allocated = irecv[1]; 1028 info->nz_unneeded = irecv[2]; 1029 info->memory = irecv[3]; 1030 info->mallocs = irecv[4]; 1031 } else if (flag == MAT_GLOBAL_SUM) { 1032 MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm); 1033 info->nz_used = irecv[0]; 1034 info->nz_allocated = irecv[1]; 1035 info->nz_unneeded = irecv[2]; 1036 info->memory = irecv[3]; 1037 info->mallocs = irecv[4]; 1038 } 1039 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1040 info->fill_ratio_needed = 0; 1041 info->factor_mallocs = 0; 1042 return 0; 1043 } 1044 1045 #undef __FUNC__ 1046 #define __FUNC__ "MatSetOption_MPIBAIJ" 1047 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1048 { 1049 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1050 1051 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1052 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1053 op == MAT_COLUMNS_UNSORTED || 1054 op == MAT_COLUMNS_SORTED || 1055 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1056 MatSetOption(a->A,op); 1057 MatSetOption(a->B,op); 1058 } else if (op == MAT_ROW_ORIENTED) { 1059 a->roworiented = 1; 1060 MatSetOption(a->A,op); 1061 MatSetOption(a->B,op); 1062 } else if (op == MAT_ROWS_SORTED || 1063 op == MAT_ROWS_UNSORTED || 1064 op == MAT_SYMMETRIC || 1065 op == MAT_STRUCTURALLY_SYMMETRIC || 1066 op == MAT_YES_NEW_DIAGONALS) 1067 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1068 else if (op == MAT_COLUMN_ORIENTED) { 1069 a->roworiented = 0; 1070 MatSetOption(a->A,op); 1071 MatSetOption(a->B,op); 1072 } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 1073 a->donotstash = 1; 1074 } else if (op == MAT_NO_NEW_DIAGONALS) 1075 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1076 else 1077 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1078 return 0; 1079 } 1080 1081 #undef __FUNC__ 1082 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1083 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1084 { 1085 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1086 Mat_SeqBAIJ *Aloc; 1087 Mat B; 1088 int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 1089 int bs=baij->bs,mbs=baij->mbs; 1090 Scalar *a; 1091 1092 if (matout == PETSC_NULL && M != N) 1093 SETERRQ(1,0,"Square matrix only for in-place"); 1094 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1095 CHKERRQ(ierr); 1096 1097 /* copy over the A part */ 1098 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1099 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1100 row = baij->rstart; 1101 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1102 1103 for ( i=0; i<mbs; i++ ) { 1104 rvals[0] = bs*(baij->rstart + i); 1105 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1106 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1107 col = (baij->cstart+aj[j])*bs; 1108 for (k=0; k<bs; k++ ) { 1109 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1110 col++; a += bs; 1111 } 1112 } 1113 } 1114 /* copy over the B part */ 1115 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1116 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1117 row = baij->rstart*bs; 1118 for ( i=0; i<mbs; i++ ) { 1119 rvals[0] = bs*(baij->rstart + i); 1120 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1121 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1122 col = baij->garray[aj[j]]*bs; 1123 for (k=0; k<bs; k++ ) { 1124 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1125 col++; a += bs; 1126 } 1127 } 1128 } 1129 PetscFree(rvals); 1130 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1131 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1132 1133 if (matout != PETSC_NULL) { 1134 *matout = B; 1135 } else { 1136 /* This isn't really an in-place transpose .... but free data structures from baij */ 1137 PetscFree(baij->rowners); 1138 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1139 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1140 if (baij->colmap) PetscFree(baij->colmap); 1141 if (baij->garray) PetscFree(baij->garray); 1142 if (baij->lvec) VecDestroy(baij->lvec); 1143 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1144 PetscFree(baij); 1145 PetscMemcpy(A,B,sizeof(struct _Mat)); 1146 PetscHeaderDestroy(B); 1147 } 1148 return 0; 1149 } 1150 1151 #undef __FUNC__ 1152 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1153 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1154 { 1155 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1156 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1157 int ierr,s1,s2,s3; 1158 1159 if (ll) { 1160 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1161 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1162 if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes"); 1163 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1164 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1165 } 1166 if (rr) SETERRQ(1,0,"not supported for right vector"); 1167 return 0; 1168 } 1169 1170 /* the code does not do the diagonal entries correctly unless the 1171 matrix is square and the column and row owerships are identical. 1172 This is a BUG. The only way to fix it seems to be to access 1173 baij->A and baij->B directly and not through the MatZeroRows() 1174 routine. 1175 */ 1176 #undef __FUNC__ 1177 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1178 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1179 { 1180 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1181 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1182 int *procs,*nprocs,j,found,idx,nsends,*work; 1183 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1184 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1185 int *lens,imdex,*lrows,*values,bs=l->bs; 1186 MPI_Comm comm = A->comm; 1187 MPI_Request *send_waits,*recv_waits; 1188 MPI_Status recv_status,*send_status; 1189 IS istmp; 1190 1191 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1192 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1193 1194 /* first count number of contributors to each processor */ 1195 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1196 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1197 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1198 for ( i=0; i<N; i++ ) { 1199 idx = rows[i]; 1200 found = 0; 1201 for ( j=0; j<size; j++ ) { 1202 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1203 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1204 } 1205 } 1206 if (!found) SETERRQ(1,0,"Index out of range"); 1207 } 1208 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1209 1210 /* inform other processors of number of messages and max length*/ 1211 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1212 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 1213 nrecvs = work[rank]; 1214 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 1215 nmax = work[rank]; 1216 PetscFree(work); 1217 1218 /* post receives: */ 1219 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 1220 CHKPTRQ(rvalues); 1221 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 1222 CHKPTRQ(recv_waits); 1223 for ( i=0; i<nrecvs; i++ ) { 1224 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 1225 } 1226 1227 /* do sends: 1228 1) starts[i] gives the starting index in svalues for stuff going to 1229 the ith processor 1230 */ 1231 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1232 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 1233 CHKPTRQ(send_waits); 1234 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1235 starts[0] = 0; 1236 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1237 for ( i=0; i<N; i++ ) { 1238 svalues[starts[owner[i]]++] = rows[i]; 1239 } 1240 ISRestoreIndices(is,&rows); 1241 1242 starts[0] = 0; 1243 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1244 count = 0; 1245 for ( i=0; i<size; i++ ) { 1246 if (procs[i]) { 1247 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 1248 } 1249 } 1250 PetscFree(starts); 1251 1252 base = owners[rank]*bs; 1253 1254 /* wait on receives */ 1255 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1256 source = lens + nrecvs; 1257 count = nrecvs; slen = 0; 1258 while (count) { 1259 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 1260 /* unpack receives into our local space */ 1261 MPI_Get_count(&recv_status,MPI_INT,&n); 1262 source[imdex] = recv_status.MPI_SOURCE; 1263 lens[imdex] = n; 1264 slen += n; 1265 count--; 1266 } 1267 PetscFree(recv_waits); 1268 1269 /* move the data into the send scatter */ 1270 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1271 count = 0; 1272 for ( i=0; i<nrecvs; i++ ) { 1273 values = rvalues + i*nmax; 1274 for ( j=0; j<lens[i]; j++ ) { 1275 lrows[count++] = values[j] - base; 1276 } 1277 } 1278 PetscFree(rvalues); PetscFree(lens); 1279 PetscFree(owner); PetscFree(nprocs); 1280 1281 /* actually zap the local rows */ 1282 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1283 PLogObjectParent(A,istmp); 1284 PetscFree(lrows); 1285 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1286 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1287 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1288 1289 /* wait on sends */ 1290 if (nsends) { 1291 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 1292 CHKPTRQ(send_status); 1293 MPI_Waitall(nsends,send_waits,send_status); 1294 PetscFree(send_status); 1295 } 1296 PetscFree(send_waits); PetscFree(svalues); 1297 1298 return 0; 1299 } 1300 extern int MatPrintHelp_SeqBAIJ(Mat); 1301 #undef __FUNC__ 1302 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1303 int MatPrintHelp_MPIBAIJ(Mat A) 1304 { 1305 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1306 1307 if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1308 else return 0; 1309 } 1310 1311 #undef __FUNC__ 1312 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1313 int MatSetUnfactored_MPIBAIJ(Mat A) 1314 { 1315 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1316 int ierr; 1317 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1318 return 0; 1319 } 1320 1321 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1322 1323 /* -------------------------------------------------------------------*/ 1324 static struct _MatOps MatOps = { 1325 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1326 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1327 0,0,0,0, 1328 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1329 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1330 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1331 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1332 0,0,0,MatGetSize_MPIBAIJ, 1333 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1334 0,0,MatConvertSameType_MPIBAIJ,0,0, 1335 0,0,0,MatGetSubMatrices_MPIBAIJ, 1336 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1337 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1338 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1339 1340 1341 #undef __FUNC__ 1342 #define __FUNC__ "MatCreateMPIBAIJ" 1343 /*@C 1344 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1345 (block compressed row). For good matrix assembly performance 1346 the user should preallocate the matrix storage by setting the parameters 1347 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1348 performance can be increased by more than a factor of 50. 1349 1350 Input Parameters: 1351 . comm - MPI communicator 1352 . bs - size of blockk 1353 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1354 This value should be the same as the local size used in creating the 1355 y vector for the matrix-vector product y = Ax. 1356 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1357 This value should be the same as the local size used in creating the 1358 x vector for the matrix-vector product y = Ax. 1359 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1360 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1361 . d_nz - number of block nonzeros per block row in diagonal portion of local 1362 submatrix (same for all local rows) 1363 . d_nzz - array containing the number of block nonzeros in the various block rows 1364 of the in diagonal portion of the local (possibly different for each block 1365 row) or PETSC_NULL. You must leave room for the diagonal entry even if 1366 it is zero. 1367 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1368 submatrix (same for all local rows). 1369 . o_nzz - array containing the number of nonzeros in the various block rows of the 1370 off-diagonal portion of the local submatrix (possibly different for 1371 each block row) or PETSC_NULL. 1372 1373 Output Parameter: 1374 . A - the matrix 1375 1376 Notes: 1377 The user MUST specify either the local or global matrix dimensions 1378 (possibly both). 1379 1380 Storage Information: 1381 For a square global matrix we define each processor's diagonal portion 1382 to be its local rows and the corresponding columns (a square submatrix); 1383 each processor's off-diagonal portion encompasses the remainder of the 1384 local matrix (a rectangular submatrix). 1385 1386 The user can specify preallocated storage for the diagonal part of 1387 the local submatrix with either d_nz or d_nnz (not both). Set 1388 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1389 memory allocation. Likewise, specify preallocated storage for the 1390 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1391 1392 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1393 the figure below we depict these three local rows and all columns (0-11). 1394 1395 $ 0 1 2 3 4 5 6 7 8 9 10 11 1396 $ ------------------- 1397 $ row 3 | o o o d d d o o o o o o 1398 $ row 4 | o o o d d d o o o o o o 1399 $ row 5 | o o o d d d o o o o o o 1400 $ ------------------- 1401 $ 1402 1403 Thus, any entries in the d locations are stored in the d (diagonal) 1404 submatrix, and any entries in the o locations are stored in the 1405 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1406 stored simply in the MATSEQBAIJ format for compressed row storage. 1407 1408 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1409 and o_nz should indicate the number of nonzeros per row in the o matrix. 1410 In general, for PDE problems in which most nonzeros are near the diagonal, 1411 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1412 or you will get TERRIBLE performance; see the users' manual chapter on 1413 matrices. 1414 1415 .keywords: matrix, block, aij, compressed row, sparse, parallel 1416 1417 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 1418 @*/ 1419 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1420 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1421 { 1422 Mat B; 1423 Mat_MPIBAIJ *b; 1424 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 1425 1426 if (bs < 1) SETERRQ(1,0,"invalid block size specified"); 1427 *A = 0; 1428 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 1429 PLogObjectCreate(B); 1430 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1431 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1432 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1433 MPI_Comm_size(comm,&size); 1434 if (size == 1) { 1435 B->ops.getrowij = MatGetRowIJ_MPIBAIJ; 1436 B->ops.restorerowij = MatRestoreRowIJ_MPIBAIJ; 1437 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIBAIJ; 1438 B->ops.lufactornumeric = MatLUFactorNumeric_MPIBAIJ; 1439 B->ops.lufactor = MatLUFactor_MPIBAIJ; 1440 B->ops.solve = MatSolve_MPIBAIJ; 1441 B->ops.solveadd = MatSolveAdd_MPIBAIJ; 1442 B->ops.solvetrans = MatSolveTrans_MPIBAIJ; 1443 B->ops.solvetransadd = MatSolveTransAdd_MPIBAIJ; 1444 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ; 1445 } 1446 1447 B->destroy = MatDestroy_MPIBAIJ; 1448 B->view = MatView_MPIBAIJ; 1449 B->mapping = 0; 1450 B->factor = 0; 1451 B->assembled = PETSC_FALSE; 1452 1453 B->insertmode = NOT_SET_VALUES; 1454 MPI_Comm_rank(comm,&b->rank); 1455 MPI_Comm_size(comm,&b->size); 1456 1457 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1458 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1459 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified"); 1460 if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified"); 1461 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1462 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1463 1464 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1465 work[0] = m; work[1] = n; 1466 mbs = m/bs; nbs = n/bs; 1467 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1468 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1469 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1470 } 1471 if (m == PETSC_DECIDE) { 1472 Mbs = M/bs; 1473 if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize"); 1474 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1475 m = mbs*bs; 1476 } 1477 if (n == PETSC_DECIDE) { 1478 Nbs = N/bs; 1479 if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize"); 1480 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1481 n = nbs*bs; 1482 } 1483 if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize"); 1484 1485 b->m = m; B->m = m; 1486 b->n = n; B->n = n; 1487 b->N = N; B->N = N; 1488 b->M = M; B->M = M; 1489 b->bs = bs; 1490 b->bs2 = bs*bs; 1491 b->mbs = mbs; 1492 b->nbs = nbs; 1493 b->Mbs = Mbs; 1494 b->Nbs = Nbs; 1495 1496 /* build local table of row and column ownerships */ 1497 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1498 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 1499 b->cowners = b->rowners + b->size + 2; 1500 MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1501 b->rowners[0] = 0; 1502 for ( i=2; i<=b->size; i++ ) { 1503 b->rowners[i] += b->rowners[i-1]; 1504 } 1505 b->rstart = b->rowners[b->rank]; 1506 b->rend = b->rowners[b->rank+1]; 1507 b->rstart_bs = b->rstart * bs; 1508 b->rend_bs = b->rend * bs; 1509 1510 MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1511 b->cowners[0] = 0; 1512 for ( i=2; i<=b->size; i++ ) { 1513 b->cowners[i] += b->cowners[i-1]; 1514 } 1515 b->cstart = b->cowners[b->rank]; 1516 b->cend = b->cowners[b->rank+1]; 1517 b->cstart_bs = b->cstart * bs; 1518 b->cend_bs = b->cend * bs; 1519 1520 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1521 ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1522 PLogObjectParent(B,b->A); 1523 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1524 ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1525 PLogObjectParent(B,b->B); 1526 1527 /* build cache for off array entries formed */ 1528 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1529 b->donotstash = 0; 1530 b->colmap = 0; 1531 b->garray = 0; 1532 b->roworiented = 1; 1533 1534 /* stuff used in block assembly */ 1535 b->barray = 0; 1536 1537 /* stuff used for matrix vector multiply */ 1538 b->lvec = 0; 1539 b->Mvctx = 0; 1540 1541 /* stuff for MatGetRow() */ 1542 b->rowindices = 0; 1543 b->rowvalues = 0; 1544 b->getrowactive = PETSC_FALSE; 1545 1546 *A = B; 1547 return 0; 1548 } 1549 1550 #undef __FUNC__ 1551 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 1552 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 1553 { 1554 Mat mat; 1555 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 1556 int ierr, len=0, flg; 1557 1558 *newmat = 0; 1559 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 1560 PLogObjectCreate(mat); 1561 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 1562 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1563 mat->destroy = MatDestroy_MPIBAIJ; 1564 mat->view = MatView_MPIBAIJ; 1565 mat->factor = matin->factor; 1566 mat->assembled = PETSC_TRUE; 1567 1568 a->m = mat->m = oldmat->m; 1569 a->n = mat->n = oldmat->n; 1570 a->M = mat->M = oldmat->M; 1571 a->N = mat->N = oldmat->N; 1572 1573 a->bs = oldmat->bs; 1574 a->bs2 = oldmat->bs2; 1575 a->mbs = oldmat->mbs; 1576 a->nbs = oldmat->nbs; 1577 a->Mbs = oldmat->Mbs; 1578 a->Nbs = oldmat->Nbs; 1579 1580 a->rstart = oldmat->rstart; 1581 a->rend = oldmat->rend; 1582 a->cstart = oldmat->cstart; 1583 a->cend = oldmat->cend; 1584 a->size = oldmat->size; 1585 a->rank = oldmat->rank; 1586 mat->insertmode = NOT_SET_VALUES; 1587 a->rowvalues = 0; 1588 a->getrowactive = PETSC_FALSE; 1589 a->barray = 0; 1590 1591 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1592 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 1593 a->cowners = a->rowners + a->size + 2; 1594 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1595 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1596 if (oldmat->colmap) { 1597 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 1598 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1599 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 1600 } else a->colmap = 0; 1601 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 1602 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1603 PLogObjectMemory(mat,len*sizeof(int)); 1604 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1605 } else a->garray = 0; 1606 1607 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1608 PLogObjectParent(mat,a->lvec); 1609 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1610 PLogObjectParent(mat,a->Mvctx); 1611 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1612 PLogObjectParent(mat,a->A); 1613 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1614 PLogObjectParent(mat,a->B); 1615 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1616 if (flg) { 1617 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1618 } 1619 *newmat = mat; 1620 return 0; 1621 } 1622 1623 #include "sys.h" 1624 1625 #undef __FUNC__ 1626 #define __FUNC__ "MatLoad_MPIBAIJ" 1627 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 1628 { 1629 Mat A; 1630 int i, nz, ierr, j,rstart, rend, fd; 1631 Scalar *vals,*buf; 1632 MPI_Comm comm = ((PetscObject)viewer)->comm; 1633 MPI_Status status; 1634 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1635 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 1636 int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 1637 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1638 int dcount,kmax,k,nzcount,tmp; 1639 1640 1641 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1642 bs2 = bs*bs; 1643 1644 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1645 if (!rank) { 1646 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1647 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1648 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1649 } 1650 1651 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1652 M = header[1]; N = header[2]; 1653 1654 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 1655 1656 /* 1657 This code adds extra rows to make sure the number of rows is 1658 divisible by the blocksize 1659 */ 1660 Mbs = M/bs; 1661 extra_rows = bs - M + bs*(Mbs); 1662 if (extra_rows == bs) extra_rows = 0; 1663 else Mbs++; 1664 if (extra_rows &&!rank) { 1665 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 1666 } 1667 1668 /* determine ownership of all rows */ 1669 mbs = Mbs/size + ((Mbs % size) > rank); 1670 m = mbs * bs; 1671 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1672 browners = rowners + size + 1; 1673 MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1674 rowners[0] = 0; 1675 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1676 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 1677 rstart = rowners[rank]; 1678 rend = rowners[rank+1]; 1679 1680 /* distribute row lengths to all processors */ 1681 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 1682 if (!rank) { 1683 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 1684 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1685 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1686 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1687 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1688 MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 1689 PetscFree(sndcounts); 1690 } 1691 else { 1692 MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 1693 } 1694 1695 if (!rank) { 1696 /* calculate the number of nonzeros on each processor */ 1697 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1698 PetscMemzero(procsnz,size*sizeof(int)); 1699 for ( i=0; i<size; i++ ) { 1700 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 1701 procsnz[i] += rowlengths[j]; 1702 } 1703 } 1704 PetscFree(rowlengths); 1705 1706 /* determine max buffer needed and allocate it */ 1707 maxnz = 0; 1708 for ( i=0; i<size; i++ ) { 1709 maxnz = PetscMax(maxnz,procsnz[i]); 1710 } 1711 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1712 1713 /* read in my part of the matrix column indices */ 1714 nz = procsnz[0]; 1715 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 1716 mycols = ibuf; 1717 if (size == 1) nz -= extra_rows; 1718 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1719 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1720 1721 /* read in every ones (except the last) and ship off */ 1722 for ( i=1; i<size-1; i++ ) { 1723 nz = procsnz[i]; 1724 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1725 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1726 } 1727 /* read in the stuff for the last proc */ 1728 if ( size != 1 ) { 1729 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 1730 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1731 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1732 MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 1733 } 1734 PetscFree(cols); 1735 } 1736 else { 1737 /* determine buffer space needed for message */ 1738 nz = 0; 1739 for ( i=0; i<m; i++ ) { 1740 nz += locrowlens[i]; 1741 } 1742 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 1743 mycols = ibuf; 1744 /* receive message of column indices*/ 1745 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1746 MPI_Get_count(&status,MPI_INT,&maxnz); 1747 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1748 } 1749 1750 /* loop over local rows, determining number of off diagonal entries */ 1751 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1752 odlens = dlens + (rend-rstart); 1753 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1754 PetscMemzero(mask,3*Mbs*sizeof(int)); 1755 masked1 = mask + Mbs; 1756 masked2 = masked1 + Mbs; 1757 rowcount = 0; nzcount = 0; 1758 for ( i=0; i<mbs; i++ ) { 1759 dcount = 0; 1760 odcount = 0; 1761 for ( j=0; j<bs; j++ ) { 1762 kmax = locrowlens[rowcount]; 1763 for ( k=0; k<kmax; k++ ) { 1764 tmp = mycols[nzcount++]/bs; 1765 if (!mask[tmp]) { 1766 mask[tmp] = 1; 1767 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 1768 else masked1[dcount++] = tmp; 1769 } 1770 } 1771 rowcount++; 1772 } 1773 1774 dlens[i] = dcount; 1775 odlens[i] = odcount; 1776 1777 /* zero out the mask elements we set */ 1778 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 1779 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 1780 } 1781 1782 /* create our matrix */ 1783 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1784 CHKERRQ(ierr); 1785 A = *newmat; 1786 MatSetOption(A,MAT_COLUMNS_SORTED); 1787 1788 if (!rank) { 1789 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 1790 /* read in my part of the matrix numerical values */ 1791 nz = procsnz[0]; 1792 vals = buf; 1793 mycols = ibuf; 1794 if (size == 1) nz -= extra_rows; 1795 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1796 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1797 1798 /* insert into matrix */ 1799 jj = rstart*bs; 1800 for ( i=0; i<m; i++ ) { 1801 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 1802 mycols += locrowlens[i]; 1803 vals += locrowlens[i]; 1804 jj++; 1805 } 1806 /* read in other processors (except the last one) and ship out */ 1807 for ( i=1; i<size-1; i++ ) { 1808 nz = procsnz[i]; 1809 vals = buf; 1810 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1811 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1812 } 1813 /* the last proc */ 1814 if ( size != 1 ){ 1815 nz = procsnz[i] - extra_rows; 1816 vals = buf; 1817 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1818 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1819 MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 1820 } 1821 PetscFree(procsnz); 1822 } 1823 else { 1824 /* receive numeric values */ 1825 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 1826 1827 /* receive message of values*/ 1828 vals = buf; 1829 mycols = ibuf; 1830 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1831 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1832 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1833 1834 /* insert into matrix */ 1835 jj = rstart*bs; 1836 for ( i=0; i<m; i++ ) { 1837 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 1838 mycols += locrowlens[i]; 1839 vals += locrowlens[i]; 1840 jj++; 1841 } 1842 } 1843 PetscFree(locrowlens); 1844 PetscFree(buf); 1845 PetscFree(ibuf); 1846 PetscFree(rowners); 1847 PetscFree(dlens); 1848 PetscFree(mask); 1849 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1850 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1851 return 0; 1852 } 1853 1854 1855