1cb512458SBarry Smith #ifndef lint 2*f5e9677aSSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.186 1997/01/14 17:18:18 balay Exp balay $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 7d9942c19SSatish Balay #include "src/inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 145615d1e5SSatish Balay #undef __FUNC__ 155615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIAIJ_Private" 160a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 179e25ed09SBarry Smith { 1844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20905e6a2fSBarry Smith int n = B->n,i; 21dbb450caSBarry Smith 220452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 23464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 24cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 25905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 269e25ed09SBarry Smith return 0; 279e25ed09SBarry Smith } 289e25ed09SBarry Smith 292493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 302493cbb0SBarry Smith 315615d1e5SSatish Balay #undef __FUNC__ 325615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIAIJ" 333b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 343b2fbd54SBarry Smith PetscTruth *done) 35299609e3SLois Curfman McInnes { 36299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 37299609e3SLois Curfman McInnes int ierr; 3817699dbbSLois Curfman McInnes if (aij->size == 1) { 393b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 40e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 413b2fbd54SBarry Smith return 0; 423b2fbd54SBarry Smith } 433b2fbd54SBarry Smith 445615d1e5SSatish Balay #undef __FUNC__ 455615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" 463b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 473b2fbd54SBarry Smith PetscTruth *done) 483b2fbd54SBarry Smith { 493b2fbd54SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 503b2fbd54SBarry Smith int ierr; 513b2fbd54SBarry Smith if (aij->size == 1) { 523b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 53e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 54299609e3SLois Curfman McInnes return 0; 55299609e3SLois Curfman McInnes } 56299609e3SLois Curfman McInnes 570520107fSSatish Balay #define CHUNKSIZE 15 58*f5e9677aSSatish Balay #define MatSetValues_SeqAIJ_A_Private(A,row,col,value,addv) \ 590520107fSSatish Balay { \ 60*f5e9677aSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ *) (A)->data; \ 61*f5e9677aSSatish Balay int *rp,ii,nrow,_i,rmax, N, col1; \ 62*f5e9677aSSatish Balay int *imax = a->imax, *ai = a->i, *ailen = a->ilen; \ 63*f5e9677aSSatish Balay int *aj = a->j, nonew = a->nonew,shift = a->indexshift; \ 64*f5e9677aSSatish Balay Scalar *ap, *aa = a->a; \ 650520107fSSatish Balay \ 660520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 670520107fSSatish Balay rmax = imax[row]; nrow = ailen[row]; \ 68*f5e9677aSSatish Balay col1 = col - shift; \ 69*f5e9677aSSatish Balay \ 700520107fSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 71*f5e9677aSSatish Balay if (rp[_i] > col1) break; \ 72*f5e9677aSSatish Balay if (rp[_i] == col1) { \ 730520107fSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 740520107fSSatish Balay else ap[_i] = value; \ 750520107fSSatish Balay goto _noinsert; \ 760520107fSSatish Balay } \ 770520107fSSatish Balay } \ 78*f5e9677aSSatish Balay if (nonew) goto _noinsert; \ 790520107fSSatish Balay if (nrow >= rmax) { \ 800520107fSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 810520107fSSatish Balay int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 820520107fSSatish Balay Scalar *new_a; \ 830520107fSSatish Balay \ 840520107fSSatish Balay /* malloc new storage space */ \ 850520107fSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 860520107fSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 870520107fSSatish Balay new_j = (int *) (new_a + new_nz); \ 880520107fSSatish Balay new_i = new_j + new_nz; \ 890520107fSSatish Balay \ 900520107fSSatish Balay /* copy over old data into new slots */ \ 910520107fSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 920520107fSSatish Balay for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 930520107fSSatish Balay PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 940520107fSSatish Balay len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 950520107fSSatish Balay PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 960520107fSSatish Balay len*sizeof(int)); \ 970520107fSSatish Balay PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 980520107fSSatish Balay PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 990520107fSSatish Balay len*sizeof(Scalar)); \ 1000520107fSSatish Balay /* free up old matrix storage */ \ 101*f5e9677aSSatish Balay \ 1020520107fSSatish Balay PetscFree(a->a); \ 1030520107fSSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 1040520107fSSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1050520107fSSatish Balay a->singlemalloc = 1; \ 1060520107fSSatish Balay \ 1070520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 1080520107fSSatish Balay rmax = imax[row] = imax[row] + CHUNKSIZE; \ 1090520107fSSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 1100520107fSSatish Balay a->maxnz += CHUNKSIZE; \ 1110520107fSSatish Balay a->reallocs++; \ 1120520107fSSatish Balay } \ 1130520107fSSatish Balay N = nrow++ - 1; a->nz++; \ 1140520107fSSatish Balay /* shift up all the later entries in this row */ \ 1150520107fSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 1160520107fSSatish Balay rp[ii+1] = rp[ii]; \ 1170520107fSSatish Balay ap[ii+1] = ap[ii]; \ 1180520107fSSatish Balay } \ 119*f5e9677aSSatish Balay rp[_i] = col1; \ 1200520107fSSatish Balay ap[_i] = value; \ 1210520107fSSatish Balay _noinsert: ; \ 1220520107fSSatish Balay ailen[row] = nrow; \ 1230520107fSSatish Balay } 1240a198c4cSBarry Smith 1250520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 1265615d1e5SSatish Balay #undef __FUNC__ 1275615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ" 1284b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 1298a729477SBarry Smith { 13044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1314b0e389bSBarry Smith Scalar value; 1321eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 1331eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 134905e6a2fSBarry Smith int roworiented = aij->roworiented; 1358a729477SBarry Smith 1360520107fSSatish Balay /* Some Variables required in the macro */ 137*f5e9677aSSatish Balay /* Mat A = aij->A; 1380520107fSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ *) (A)->data; 139*f5e9677aSSatish Balay int *rp,ii,nrow,_i,rmax,N,col2; 1400520107fSSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1410520107fSSatish Balay int *aj=a->j,shift=a->indexshift; 1420520107fSSatish Balay Scalar *ap,*aa=a->a; 143*f5e9677aSSatish Balay */ 1440a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 14564119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 146e3372554SBarry Smith SETERRQ(1,0,"Cannot mix inserts and adds"); 1478a729477SBarry Smith } 1480a198c4cSBarry Smith #endif 1491eb62cbbSBarry Smith aij->insertmode = addv; 1508a729477SBarry Smith for ( i=0; i<m; i++ ) { 1510a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 152e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 153e3372554SBarry Smith if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 1540a198c4cSBarry Smith #endif 1554b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 1564b0e389bSBarry Smith row = im[i] - rstart; 1571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 1584b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 1594b0e389bSBarry Smith col = in[j] - cstart; 1604b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 161*f5e9677aSSatish Balay MatSetValues_SeqAIJ_A_Private(aij->A,row,col,value,addv); 1620520107fSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 1631eb62cbbSBarry Smith } 1640a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 165e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 166e3372554SBarry Smith else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 1670a198c4cSBarry Smith #endif 1681eb62cbbSBarry Smith else { 169227d817aSBarry Smith if (mat->was_assembled) { 170905e6a2fSBarry Smith if (!aij->colmap) { 171905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 172905e6a2fSBarry Smith } 173905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 174ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 1752493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 1764b0e389bSBarry Smith col = in[j]; 177d6dfbf8fSBarry Smith } 1789e25ed09SBarry Smith } 1794b0e389bSBarry Smith else col = in[j]; 1804b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 1810a198c4cSBarry Smith ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 1821eb62cbbSBarry Smith } 1831eb62cbbSBarry Smith } 1841eb62cbbSBarry Smith } 1851eb62cbbSBarry Smith else { 18690f02eecSBarry Smith if (roworiented && !aij->donotstash) { 1874b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 1884b0e389bSBarry Smith } 1894b0e389bSBarry Smith else { 19090f02eecSBarry Smith if (!aij->donotstash) { 1914b0e389bSBarry Smith row = im[i]; 1924b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 1934b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 1944b0e389bSBarry Smith } 1954b0e389bSBarry Smith } 1961eb62cbbSBarry Smith } 1978a729477SBarry Smith } 19890f02eecSBarry Smith } 1998a729477SBarry Smith return 0; 2008a729477SBarry Smith } 2018a729477SBarry Smith 2025615d1e5SSatish Balay #undef __FUNC__ 2035615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ" 204b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 205b49de8d1SLois Curfman McInnes { 206b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 207b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 208b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 209b49de8d1SLois Curfman McInnes 210b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 211e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 212e3372554SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 213b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 214b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 215b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 216e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 217e3372554SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 218b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 219b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 220b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 221b49de8d1SLois Curfman McInnes } 222b49de8d1SLois Curfman McInnes else { 223905e6a2fSBarry Smith if (!aij->colmap) { 224905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 225905e6a2fSBarry Smith } 226905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 227e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 228d9d09a02SSatish Balay else { 229b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 230b49de8d1SLois Curfman McInnes } 231b49de8d1SLois Curfman McInnes } 232b49de8d1SLois Curfman McInnes } 233d9d09a02SSatish Balay } 234b49de8d1SLois Curfman McInnes else { 235e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 236b49de8d1SLois Curfman McInnes } 237b49de8d1SLois Curfman McInnes } 238b49de8d1SLois Curfman McInnes return 0; 239b49de8d1SLois Curfman McInnes } 240b49de8d1SLois Curfman McInnes 2415615d1e5SSatish Balay #undef __FUNC__ 2425615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 243fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 2448a729477SBarry Smith { 24544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 246d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 24717699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 24817699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 2491eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 2506abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 2511eb62cbbSBarry Smith InsertMode addv; 2521eb62cbbSBarry Smith Scalar *rvalues,*svalues; 2531eb62cbbSBarry Smith 2541eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 255d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 256dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 257e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 2581eb62cbbSBarry Smith } 2591eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 2601eb62cbbSBarry Smith 2611eb62cbbSBarry Smith /* first count number of contributors to each processor */ 2620452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 263cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 2640452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 2651eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 2661eb62cbbSBarry Smith idx = aij->stash.idx[i]; 26717699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 2681eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 2691eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 2708a729477SBarry Smith } 2718a729477SBarry Smith } 2728a729477SBarry Smith } 27317699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 2741eb62cbbSBarry Smith 2751eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 2760452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 27717699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 27817699dbbSLois Curfman McInnes nreceives = work[rank]; 27917699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 28017699dbbSLois Curfman McInnes nmax = work[rank]; 2810452661fSBarry Smith PetscFree(work); 2821eb62cbbSBarry Smith 2831eb62cbbSBarry Smith /* post receives: 2841eb62cbbSBarry Smith 1) each message will consist of ordered pairs 2851eb62cbbSBarry Smith (global index,value) we store the global index as a double 286d6dfbf8fSBarry Smith to simplify the message passing. 2871eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 2881eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 2891eb62cbbSBarry Smith this is a lot of wasted space. 2901eb62cbbSBarry Smith 2911eb62cbbSBarry Smith 2921eb62cbbSBarry Smith This could be done better. 2931eb62cbbSBarry Smith */ 2940452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 29578b31e54SBarry Smith CHKPTRQ(rvalues); 2960452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 29778b31e54SBarry Smith CHKPTRQ(recv_waits); 2981eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 299416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 3001eb62cbbSBarry Smith comm,recv_waits+i); 3011eb62cbbSBarry Smith } 3021eb62cbbSBarry Smith 3031eb62cbbSBarry Smith /* do sends: 3041eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3051eb62cbbSBarry Smith the ith processor 3061eb62cbbSBarry Smith */ 3070452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 3080452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 30978b31e54SBarry Smith CHKPTRQ(send_waits); 3100452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 3111eb62cbbSBarry Smith starts[0] = 0; 31217699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3131eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3141eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 3151eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 3161eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 3171eb62cbbSBarry Smith } 3180452661fSBarry Smith PetscFree(owner); 3191eb62cbbSBarry Smith starts[0] = 0; 32017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3211eb62cbbSBarry Smith count = 0; 32217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3231eb62cbbSBarry Smith if (procs[i]) { 324416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 3251eb62cbbSBarry Smith comm,send_waits+count++); 3261eb62cbbSBarry Smith } 3271eb62cbbSBarry Smith } 3280452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 3291eb62cbbSBarry Smith 3301eb62cbbSBarry Smith /* Free cache space */ 33155908cccSLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 33278b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 3331eb62cbbSBarry Smith 3341eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 3351eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 3361eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 3371eb62cbbSBarry Smith aij->rmax = nmax; 3381eb62cbbSBarry Smith 3391eb62cbbSBarry Smith return 0; 3401eb62cbbSBarry Smith } 34144a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 3421eb62cbbSBarry Smith 3435615d1e5SSatish Balay #undef __FUNC__ 3445615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 345fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 3461eb62cbbSBarry Smith { 34744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 3481eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 349416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 350905e6a2fSBarry Smith int row,col,other_disassembled; 3511eb62cbbSBarry Smith Scalar *values,val; 3521eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 3531eb62cbbSBarry Smith 3541eb62cbbSBarry Smith /* wait on receives */ 3551eb62cbbSBarry Smith while (count) { 356d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 3571eb62cbbSBarry Smith /* unpack receives into our local space */ 358d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 359c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 3601eb62cbbSBarry Smith n = n/3; 3611eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 362227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 363227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 3641eb62cbbSBarry Smith val = values[3*i+2]; 3651eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 3661eb62cbbSBarry Smith col -= aij->cstart; 3671eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 3681eb62cbbSBarry Smith } 3691eb62cbbSBarry Smith else { 37055a1b374SBarry Smith if (mat->was_assembled || mat->assembled) { 371905e6a2fSBarry Smith if (!aij->colmap) { 372905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 373905e6a2fSBarry Smith } 374905e6a2fSBarry Smith col = aij->colmap[col] - 1; 375ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 3762493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 377227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 378d6dfbf8fSBarry Smith } 3799e25ed09SBarry Smith } 3801eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 3811eb62cbbSBarry Smith } 3821eb62cbbSBarry Smith } 3831eb62cbbSBarry Smith count--; 3841eb62cbbSBarry Smith } 3850452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 3861eb62cbbSBarry Smith 3871eb62cbbSBarry Smith /* wait on sends */ 3881eb62cbbSBarry Smith if (aij->nsends) { 3890a198c4cSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 3901eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 3910452661fSBarry Smith PetscFree(send_status); 3921eb62cbbSBarry Smith } 3930452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 3941eb62cbbSBarry Smith 39564119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 39678b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 39778b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 3981eb62cbbSBarry Smith 3992493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 4002493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 401227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 402227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 4032493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 4042493cbb0SBarry Smith } 4052493cbb0SBarry Smith 4066d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 40778b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 4085e42470aSBarry Smith } 40978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 41078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 4115e42470aSBarry Smith 4127a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 4138a729477SBarry Smith return 0; 4148a729477SBarry Smith } 4158a729477SBarry Smith 4165615d1e5SSatish Balay #undef __FUNC__ 4175615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ" 4182ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 4191eb62cbbSBarry Smith { 42044a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 421dbd7a890SLois Curfman McInnes int ierr; 42278b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 42378b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 4241eb62cbbSBarry Smith return 0; 4251eb62cbbSBarry Smith } 4261eb62cbbSBarry Smith 4271eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 4281eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 4291eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 4301eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 4311eb62cbbSBarry Smith routine. 4321eb62cbbSBarry Smith */ 4335615d1e5SSatish Balay #undef __FUNC__ 4345615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ" 43544a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 4361eb62cbbSBarry Smith { 43744a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 43817699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 4396abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 44017699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 4415392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 442d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 443d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 4441eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 4451eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 4461eb62cbbSBarry Smith IS istmp; 4471eb62cbbSBarry Smith 44877c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 44978b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 4501eb62cbbSBarry Smith 4511eb62cbbSBarry Smith /* first count number of contributors to each processor */ 4520452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 453cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 4540452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 4551eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 4561eb62cbbSBarry Smith idx = rows[i]; 4571eb62cbbSBarry Smith found = 0; 45817699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 4591eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 4601eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 4611eb62cbbSBarry Smith } 4621eb62cbbSBarry Smith } 463e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 4641eb62cbbSBarry Smith } 46517699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 4661eb62cbbSBarry Smith 4671eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 4680452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 46917699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 47017699dbbSLois Curfman McInnes nrecvs = work[rank]; 47117699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 47217699dbbSLois Curfman McInnes nmax = work[rank]; 4730452661fSBarry Smith PetscFree(work); 4741eb62cbbSBarry Smith 4751eb62cbbSBarry Smith /* post receives: */ 4760452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 47778b31e54SBarry Smith CHKPTRQ(rvalues); 4780452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 47978b31e54SBarry Smith CHKPTRQ(recv_waits); 4801eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 481416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 4821eb62cbbSBarry Smith } 4831eb62cbbSBarry Smith 4841eb62cbbSBarry Smith /* do sends: 4851eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 4861eb62cbbSBarry Smith the ith processor 4871eb62cbbSBarry Smith */ 4880452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 4890452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 49078b31e54SBarry Smith CHKPTRQ(send_waits); 4910452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 4921eb62cbbSBarry Smith starts[0] = 0; 49317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4941eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 4951eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 4961eb62cbbSBarry Smith } 4971eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 4981eb62cbbSBarry Smith 4991eb62cbbSBarry Smith starts[0] = 0; 50017699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 5011eb62cbbSBarry Smith count = 0; 50217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 5031eb62cbbSBarry Smith if (procs[i]) { 504416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 5051eb62cbbSBarry Smith } 5061eb62cbbSBarry Smith } 5070452661fSBarry Smith PetscFree(starts); 5081eb62cbbSBarry Smith 50917699dbbSLois Curfman McInnes base = owners[rank]; 5101eb62cbbSBarry Smith 5111eb62cbbSBarry Smith /* wait on receives */ 5120452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 5131eb62cbbSBarry Smith source = lens + nrecvs; 5141eb62cbbSBarry Smith count = nrecvs; slen = 0; 5151eb62cbbSBarry Smith while (count) { 516d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 5171eb62cbbSBarry Smith /* unpack receives into our local space */ 5181eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 519d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 520d6dfbf8fSBarry Smith lens[imdex] = n; 5211eb62cbbSBarry Smith slen += n; 5221eb62cbbSBarry Smith count--; 5231eb62cbbSBarry Smith } 5240452661fSBarry Smith PetscFree(recv_waits); 5251eb62cbbSBarry Smith 5261eb62cbbSBarry Smith /* move the data into the send scatter */ 5270452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 5281eb62cbbSBarry Smith count = 0; 5291eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 5301eb62cbbSBarry Smith values = rvalues + i*nmax; 5311eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 5321eb62cbbSBarry Smith lrows[count++] = values[j] - base; 5331eb62cbbSBarry Smith } 5341eb62cbbSBarry Smith } 5350452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 5360452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 5371eb62cbbSBarry Smith 5381eb62cbbSBarry Smith /* actually zap the local rows */ 539537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 540464493b3SBarry Smith PLogObjectParent(A,istmp); 5410452661fSBarry Smith PetscFree(lrows); 54278b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 54378b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 54478b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 5451eb62cbbSBarry Smith 5461eb62cbbSBarry Smith /* wait on sends */ 5471eb62cbbSBarry Smith if (nsends) { 5480452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 54978b31e54SBarry Smith CHKPTRQ(send_status); 5501eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 5510452661fSBarry Smith PetscFree(send_status); 5521eb62cbbSBarry Smith } 5530452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 5541eb62cbbSBarry Smith 5551eb62cbbSBarry Smith return 0; 5561eb62cbbSBarry Smith } 5571eb62cbbSBarry Smith 5585615d1e5SSatish Balay #undef __FUNC__ 5595615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ" 560416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 5611eb62cbbSBarry Smith { 562416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 563fbd6ef76SBarry Smith int ierr,nt; 564416022c9SBarry Smith 565a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 566fbd6ef76SBarry Smith if (nt != a->n) { 567e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and xx"); 568fbd6ef76SBarry Smith } 56943a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 57038597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 57143a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 57238597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 5731eb62cbbSBarry Smith return 0; 5741eb62cbbSBarry Smith } 5751eb62cbbSBarry Smith 5765615d1e5SSatish Balay #undef __FUNC__ 5775615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ" 578416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 579da3a660dSBarry Smith { 580416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 581da3a660dSBarry Smith int ierr; 58243a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 58338597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 58443a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 58538597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 586da3a660dSBarry Smith return 0; 587da3a660dSBarry Smith } 588da3a660dSBarry Smith 5895615d1e5SSatish Balay #undef __FUNC__ 5905615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ" 591416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 592da3a660dSBarry Smith { 593416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 594da3a660dSBarry Smith int ierr; 595da3a660dSBarry Smith 596da3a660dSBarry Smith /* do nondiagonal part */ 59738597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 598da3a660dSBarry Smith /* send it on its way */ 599537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 600da3a660dSBarry Smith /* do local part */ 60138597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 602da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 603da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 604da3a660dSBarry Smith /* but is not perhaps always true. */ 605537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 606da3a660dSBarry Smith return 0; 607da3a660dSBarry Smith } 608da3a660dSBarry Smith 6095615d1e5SSatish Balay #undef __FUNC__ 6105615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ" 611416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 612da3a660dSBarry Smith { 613416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 614da3a660dSBarry Smith int ierr; 615da3a660dSBarry Smith 616da3a660dSBarry Smith /* do nondiagonal part */ 61738597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 618da3a660dSBarry Smith /* send it on its way */ 619537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 620da3a660dSBarry Smith /* do local part */ 62138597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 622da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 623da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 624da3a660dSBarry Smith /* but is not perhaps always true. */ 6250a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 626da3a660dSBarry Smith return 0; 627da3a660dSBarry Smith } 628da3a660dSBarry Smith 6291eb62cbbSBarry Smith /* 6301eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 6311eb62cbbSBarry Smith diagonal block 6321eb62cbbSBarry Smith */ 6335615d1e5SSatish Balay #undef __FUNC__ 6345615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ" 635416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 6361eb62cbbSBarry Smith { 637416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 63844cd7ae7SLois Curfman McInnes if (a->M != a->N) 639e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 6405baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 641e3372554SBarry Smith SETERRQ(1,0,"row partition must equal col partition"); } 642416022c9SBarry Smith return MatGetDiagonal(a->A,v); 6431eb62cbbSBarry Smith } 6441eb62cbbSBarry Smith 6455615d1e5SSatish Balay #undef __FUNC__ 6465615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ" 647052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 648052efed2SBarry Smith { 649052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 650052efed2SBarry Smith int ierr; 651052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 652052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 653052efed2SBarry Smith return 0; 654052efed2SBarry Smith } 655052efed2SBarry Smith 6565615d1e5SSatish Balay #undef __FUNC__ 6575615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIAIJ" 65844a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 6591eb62cbbSBarry Smith { 6601eb62cbbSBarry Smith Mat mat = (Mat) obj; 66144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 6621eb62cbbSBarry Smith int ierr; 663a5a9c739SBarry Smith #if defined(PETSC_LOG) 6646e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 665a5a9c739SBarry Smith #endif 6660452661fSBarry Smith PetscFree(aij->rowners); 66778b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 66878b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 6690452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 6700452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 6711eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 672a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 6737a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 6740452661fSBarry Smith PetscFree(aij); 67590f02eecSBarry Smith if (mat->mapping) { 67690f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 67790f02eecSBarry Smith } 678a5a9c739SBarry Smith PLogObjectDestroy(mat); 6790452661fSBarry Smith PetscHeaderDestroy(mat); 6801eb62cbbSBarry Smith return 0; 6811eb62cbbSBarry Smith } 6827857610eSBarry Smith #include "draw.h" 683b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 684ee50ffe9SBarry Smith 6855615d1e5SSatish Balay #undef __FUNC__ 6865615d1e5SSatish Balay #define __FUNC__ "MatView_MPIAIJ_Binary" 687416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 6881eb62cbbSBarry Smith { 689416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 690416022c9SBarry Smith int ierr; 691416022c9SBarry Smith 69217699dbbSLois Curfman McInnes if (aij->size == 1) { 693416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 694416022c9SBarry Smith } 695e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 696416022c9SBarry Smith return 0; 697416022c9SBarry Smith } 698416022c9SBarry Smith 6995615d1e5SSatish Balay #undef __FUNC__ 7005615d1e5SSatish Balay #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 701d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 702416022c9SBarry Smith { 70344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 704dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 705a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 706d636dbe3SBarry Smith FILE *fd; 70719bcc07fSBarry Smith ViewerType vtype; 708416022c9SBarry Smith 70919bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 71019bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 71190ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 7120a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 7134e220ebcSLois Curfman McInnes MatInfo info; 7144e220ebcSLois Curfman McInnes int flg; 715a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 71690ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 7174e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 71895e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 71977c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 72095e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 7214e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 72295e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 7234e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 7244e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 7254e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 7264e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 7274e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 728a56f8943SBarry Smith fflush(fd); 72977c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 730a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 731416022c9SBarry Smith return 0; 732416022c9SBarry Smith } 7330a198c4cSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 73408480c60SBarry Smith return 0; 73508480c60SBarry Smith } 736416022c9SBarry Smith } 737416022c9SBarry Smith 73819bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 73919bcc07fSBarry Smith Draw draw; 74019bcc07fSBarry Smith PetscTruth isnull; 74119bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 74219bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 74319bcc07fSBarry Smith } 74419bcc07fSBarry Smith 74519bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 74690ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 74777c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 748d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 74917699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 7501eb62cbbSBarry Smith aij->cend); 75178b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 75278b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 753d13ab20cSBarry Smith fflush(fd); 75477c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 755d13ab20cSBarry Smith } 756416022c9SBarry Smith else { 757a56f8943SBarry Smith int size = aij->size; 758a56f8943SBarry Smith rank = aij->rank; 75917699dbbSLois Curfman McInnes if (size == 1) { 76078b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 76195373324SBarry Smith } 76295373324SBarry Smith else { 76395373324SBarry Smith /* assemble the entire matrix onto first processor. */ 76495373324SBarry Smith Mat A; 765ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 7662eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 76795373324SBarry Smith Scalar *a; 7682ee70a88SLois Curfman McInnes 76917699dbbSLois Curfman McInnes if (!rank) { 770b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 771c451ab25SLois Curfman McInnes CHKERRQ(ierr); 77295373324SBarry Smith } 77395373324SBarry Smith else { 774b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 775c451ab25SLois Curfman McInnes CHKERRQ(ierr); 77695373324SBarry Smith } 777464493b3SBarry Smith PLogObjectParent(mat,A); 778416022c9SBarry Smith 77995373324SBarry Smith /* copy over the A part */ 780ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 7812ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 78295373324SBarry Smith row = aij->rstart; 783dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 78495373324SBarry Smith for ( i=0; i<m; i++ ) { 785416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 78695373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 78795373324SBarry Smith } 7882ee70a88SLois Curfman McInnes aj = Aloc->j; 789dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 79095373324SBarry Smith 79195373324SBarry Smith /* copy over the B part */ 792ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 7932ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 79495373324SBarry Smith row = aij->rstart; 7950452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 796dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 79795373324SBarry Smith for ( i=0; i<m; i++ ) { 798416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 79995373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 80095373324SBarry Smith } 8010452661fSBarry Smith PetscFree(ct); 8026d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8036d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 80417699dbbSLois Curfman McInnes if (!rank) { 80578b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 80695373324SBarry Smith } 80778b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 80895373324SBarry Smith } 80995373324SBarry Smith } 8101eb62cbbSBarry Smith return 0; 8111eb62cbbSBarry Smith } 8121eb62cbbSBarry Smith 8135615d1e5SSatish Balay #undef __FUNC__ 8145615d1e5SSatish Balay #define __FUNC__ "MatView_MPIAIJ" 815416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 816416022c9SBarry Smith { 817416022c9SBarry Smith Mat mat = (Mat) obj; 818416022c9SBarry Smith int ierr; 81919bcc07fSBarry Smith ViewerType vtype; 820416022c9SBarry Smith 82119bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 82219bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 82319bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 824d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 825416022c9SBarry Smith } 82619bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 827416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 828416022c9SBarry Smith } 829416022c9SBarry Smith return 0; 830416022c9SBarry Smith } 831416022c9SBarry Smith 8321eb62cbbSBarry Smith /* 8331eb62cbbSBarry Smith This has to provide several versions. 8341eb62cbbSBarry Smith 8351eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 8361eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 837d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 8381eb62cbbSBarry Smith */ 8395615d1e5SSatish Balay #undef __FUNC__ 8405615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ" 841fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 842dbb450caSBarry Smith double fshift,int its,Vec xx) 8438a729477SBarry Smith { 84444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 845d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 846ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 847c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 8486abc6512SBarry Smith int ierr,*idx, *diag; 849416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 8508a729477SBarry Smith 851d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 852dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 853dbb450caSBarry Smith ls = ls + shift; 854ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 855d6dfbf8fSBarry Smith diag = A->diag; 856c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 857da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 85838597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 859da3a660dSBarry Smith } 86043a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 86178b31e54SBarry Smith CHKERRQ(ierr); 86243a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 86378b31e54SBarry Smith CHKERRQ(ierr); 864d6dfbf8fSBarry Smith while (its--) { 865d6dfbf8fSBarry Smith /* go down through the rows */ 866d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 867d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 868dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 869dbb450caSBarry Smith v = A->a + A->i[i] + shift; 870d6dfbf8fSBarry Smith sum = b[i]; 871d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 872dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 873d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 874dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 875dbb450caSBarry Smith v = B->a + B->i[i] + shift; 876d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 87755a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 878d6dfbf8fSBarry Smith } 879d6dfbf8fSBarry Smith /* come up through the rows */ 880d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 881d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 882dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 883dbb450caSBarry Smith v = A->a + A->i[i] + shift; 884d6dfbf8fSBarry Smith sum = b[i]; 885d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 886dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 887d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 888dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 889dbb450caSBarry Smith v = B->a + B->i[i] + shift; 890d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 89155a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 892d6dfbf8fSBarry Smith } 893d6dfbf8fSBarry Smith } 894d6dfbf8fSBarry Smith } 895d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 896da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 89738597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 898da3a660dSBarry Smith } 89943a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 90078b31e54SBarry Smith CHKERRQ(ierr); 90143a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 90278b31e54SBarry Smith CHKERRQ(ierr); 903d6dfbf8fSBarry Smith while (its--) { 904d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 905d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 906dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 907dbb450caSBarry Smith v = A->a + A->i[i] + shift; 908d6dfbf8fSBarry Smith sum = b[i]; 909d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 910dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 911d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 912dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 913dbb450caSBarry Smith v = B->a + B->i[i] + shift; 914d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 91555a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 916d6dfbf8fSBarry Smith } 917d6dfbf8fSBarry Smith } 918d6dfbf8fSBarry Smith } 919d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 920da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 92138597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 922da3a660dSBarry Smith } 92343a90d84SBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 92478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 92543a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 92678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 927d6dfbf8fSBarry Smith while (its--) { 928d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 929d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 930dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 931dbb450caSBarry Smith v = A->a + A->i[i] + shift; 932d6dfbf8fSBarry Smith sum = b[i]; 933d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 934dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 935d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 936dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 937dbb450caSBarry Smith v = B->a + B->i[i] + shift; 938d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 93955a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 940d6dfbf8fSBarry Smith } 941d6dfbf8fSBarry Smith } 942d6dfbf8fSBarry Smith } 943c16cb8f2SBarry Smith else { 944e3372554SBarry Smith SETERRQ(1,0,"Option not supported"); 945c16cb8f2SBarry Smith } 9468a729477SBarry Smith return 0; 9478a729477SBarry Smith } 948a66be287SLois Curfman McInnes 9495615d1e5SSatish Balay #undef __FUNC__ 9505615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIAIJ" 9514e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 952a66be287SLois Curfman McInnes { 953a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 954a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 9554e220ebcSLois Curfman McInnes int ierr; 9564e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 957a66be287SLois Curfman McInnes 9584e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 9594e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 9604e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 9614e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 9624e220ebcSLois Curfman McInnes info->block_size = 1.0; 9634e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 9644e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 9654e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 9664e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 9674e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 9684e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 969a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 9704e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 9714e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 9724e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 9734e220ebcSLois Curfman McInnes info->memory = isend[3]; 9744e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 975a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 9764e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 9774e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 9784e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 9794e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 9804e220ebcSLois Curfman McInnes info->memory = irecv[3]; 9814e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 982a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 9834e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 9844e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 9854e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 9864e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 9874e220ebcSLois Curfman McInnes info->memory = irecv[3]; 9884e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 989a66be287SLois Curfman McInnes } 9904e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 9914e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 9924e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 9934e220ebcSLois Curfman McInnes 994a66be287SLois Curfman McInnes return 0; 995a66be287SLois Curfman McInnes } 996a66be287SLois Curfman McInnes 997299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 998299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 999299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1000299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1001299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1002299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1003299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1004299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1005299609e3SLois Curfman McInnes 10065615d1e5SSatish Balay #undef __FUNC__ 10075615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIAIJ" 1008416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1009c74985f6SBarry Smith { 1010c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1011c74985f6SBarry Smith 10126d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 10136d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 10146da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1015b1fbbac0SLois Curfman McInnes op == MAT_COLUMNS_SORTED) { 1016b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1017b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1018b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1019aeafbbfcSLois Curfman McInnes a->roworiented = 1; 1020c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1021c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1022b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 10236da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 10246d4a8577SBarry Smith op == MAT_SYMMETRIC || 10256d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 10266d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 102794a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 10286d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 10294b0e389bSBarry Smith a->roworiented = 0; 10304b0e389bSBarry Smith MatSetOption(a->A,op); 10314b0e389bSBarry Smith MatSetOption(a->B,op); 103290f02eecSBarry Smith } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 103390f02eecSBarry Smith a->donotstash = 1; 103490f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 1035e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1036c0bbcb79SLois Curfman McInnes else 1037e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1038c74985f6SBarry Smith return 0; 1039c74985f6SBarry Smith } 1040c74985f6SBarry Smith 10415615d1e5SSatish Balay #undef __FUNC__ 10425615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIAIJ" 1043d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1044c74985f6SBarry Smith { 104544a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1046c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1047c74985f6SBarry Smith return 0; 1048c74985f6SBarry Smith } 1049c74985f6SBarry Smith 10505615d1e5SSatish Balay #undef __FUNC__ 10515615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1052d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1053c74985f6SBarry Smith { 105444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1055b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1056c74985f6SBarry Smith return 0; 1057c74985f6SBarry Smith } 1058c74985f6SBarry Smith 10595615d1e5SSatish Balay #undef __FUNC__ 10605615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1061d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1062c74985f6SBarry Smith { 106344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1064c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1065c74985f6SBarry Smith return 0; 1066c74985f6SBarry Smith } 1067c74985f6SBarry Smith 10686d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 10696d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 10706d84be18SBarry Smith 10715615d1e5SSatish Balay #undef __FUNC__ 10725615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ" 10736d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 107439e00950SLois Curfman McInnes { 1075154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 107670f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1077154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1078154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 107970f0671dSBarry Smith int *cmap, *idx_p; 108039e00950SLois Curfman McInnes 1081e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 10827a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 10837a0afa10SBarry Smith 108470f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 10857a0afa10SBarry Smith /* 10867a0afa10SBarry Smith allocate enough space to hold information from the longest row. 10877a0afa10SBarry Smith */ 10887a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1089c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 1090c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 10917a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 10927a0afa10SBarry Smith if (max < tmp) { max = tmp; } 10937a0afa10SBarry Smith } 10947a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 10957a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 10967a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 10977a0afa10SBarry Smith } 10987a0afa10SBarry Smith 1099e3372554SBarry Smith if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1100abc0e9e4SLois Curfman McInnes lrow = row - rstart; 110139e00950SLois Curfman McInnes 1102154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1103154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1104154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 110538597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 110638597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1107154123eaSLois Curfman McInnes nztot = nzA + nzB; 1108154123eaSLois Curfman McInnes 110970f0671dSBarry Smith cmap = mat->garray; 1110154123eaSLois Curfman McInnes if (v || idx) { 1111154123eaSLois Curfman McInnes if (nztot) { 1112154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 111370f0671dSBarry Smith int imark = -1; 1114154123eaSLois Curfman McInnes if (v) { 111570f0671dSBarry Smith *v = v_p = mat->rowvalues; 111639e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 111770f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1118154123eaSLois Curfman McInnes else break; 1119154123eaSLois Curfman McInnes } 1120154123eaSLois Curfman McInnes imark = i; 112170f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 112270f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1123154123eaSLois Curfman McInnes } 1124154123eaSLois Curfman McInnes if (idx) { 112570f0671dSBarry Smith *idx = idx_p = mat->rowindices; 112670f0671dSBarry Smith if (imark > -1) { 112770f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 112870f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 112970f0671dSBarry Smith } 113070f0671dSBarry Smith } else { 1131154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 113270f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1133154123eaSLois Curfman McInnes else break; 1134154123eaSLois Curfman McInnes } 1135154123eaSLois Curfman McInnes imark = i; 113670f0671dSBarry Smith } 113770f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 113870f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 113939e00950SLois Curfman McInnes } 114039e00950SLois Curfman McInnes } 11411ca473b0SSatish Balay else { 11421ca473b0SSatish Balay if (idx) *idx = 0; 11431ca473b0SSatish Balay if (v) *v = 0; 11441ca473b0SSatish Balay } 1145154123eaSLois Curfman McInnes } 114639e00950SLois Curfman McInnes *nz = nztot; 114738597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 114838597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 114939e00950SLois Curfman McInnes return 0; 115039e00950SLois Curfman McInnes } 115139e00950SLois Curfman McInnes 11525615d1e5SSatish Balay #undef __FUNC__ 11535615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIAIJ" 11546d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 115539e00950SLois Curfman McInnes { 11567a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 11577a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 1158e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 11597a0afa10SBarry Smith } 11607a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 116139e00950SLois Curfman McInnes return 0; 116239e00950SLois Curfman McInnes } 116339e00950SLois Curfman McInnes 11645615d1e5SSatish Balay #undef __FUNC__ 11655615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ" 1166cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1167855ac2c5SLois Curfman McInnes { 1168855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1169ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1170416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1171416022c9SBarry Smith double sum = 0.0; 117204ca555eSLois Curfman McInnes Scalar *v; 117304ca555eSLois Curfman McInnes 117417699dbbSLois Curfman McInnes if (aij->size == 1) { 117514183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 117637fa93a5SLois Curfman McInnes } else { 117704ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 117804ca555eSLois Curfman McInnes v = amat->a; 117904ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 118004ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 118104ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 118204ca555eSLois Curfman McInnes #else 118304ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 118404ca555eSLois Curfman McInnes #endif 118504ca555eSLois Curfman McInnes } 118604ca555eSLois Curfman McInnes v = bmat->a; 118704ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 118804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 118904ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 119004ca555eSLois Curfman McInnes #else 119104ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 119204ca555eSLois Curfman McInnes #endif 119304ca555eSLois Curfman McInnes } 11946d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 119504ca555eSLois Curfman McInnes *norm = sqrt(*norm); 119604ca555eSLois Curfman McInnes } 119704ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 119804ca555eSLois Curfman McInnes double *tmp, *tmp2; 119904ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 12000452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 12010452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1202cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 120304ca555eSLois Curfman McInnes *norm = 0.0; 120404ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 120504ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1206579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 120704ca555eSLois Curfman McInnes } 120804ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 120904ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1210579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 121104ca555eSLois Curfman McInnes } 12126d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 121304ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 121404ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 121504ca555eSLois Curfman McInnes } 12160452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 121704ca555eSLois Curfman McInnes } 121804ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1219515d9167SLois Curfman McInnes double ntemp = 0.0; 122004ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1221dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 122204ca555eSLois Curfman McInnes sum = 0.0; 122304ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1224cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 122504ca555eSLois Curfman McInnes } 1226dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 122704ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1228cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 122904ca555eSLois Curfman McInnes } 1230515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 123104ca555eSLois Curfman McInnes } 12326d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 123304ca555eSLois Curfman McInnes } 123404ca555eSLois Curfman McInnes else { 1235e3372554SBarry Smith SETERRQ(1,0,"No support for two norm"); 123604ca555eSLois Curfman McInnes } 123737fa93a5SLois Curfman McInnes } 1238855ac2c5SLois Curfman McInnes return 0; 1239855ac2c5SLois Curfman McInnes } 1240855ac2c5SLois Curfman McInnes 12415615d1e5SSatish Balay #undef __FUNC__ 12425615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ" 12430de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1244b7c46309SBarry Smith { 1245b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1246dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1247416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1248416022c9SBarry Smith Mat B; 1249b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1250b7c46309SBarry Smith Scalar *array; 1251b7c46309SBarry Smith 12523638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 1253e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 1254b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1255b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1256b7c46309SBarry Smith 1257b7c46309SBarry Smith /* copy over the A part */ 1258ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1259b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1260b7c46309SBarry Smith row = a->rstart; 1261dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1262b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1263416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1264b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1265b7c46309SBarry Smith } 1266b7c46309SBarry Smith aj = Aloc->j; 12674af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1268b7c46309SBarry Smith 1269b7c46309SBarry Smith /* copy over the B part */ 1270ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1271b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1272b7c46309SBarry Smith row = a->rstart; 12730452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1274dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1275b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1276416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1277b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1278b7c46309SBarry Smith } 12790452661fSBarry Smith PetscFree(ct); 12806d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12816d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12823638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 12830de55854SLois Curfman McInnes *matout = B; 12840de55854SLois Curfman McInnes } else { 12850de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 12860452661fSBarry Smith PetscFree(a->rowners); 12870de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 12880de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 12890452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 12900452661fSBarry Smith if (a->garray) PetscFree(a->garray); 12910de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1292a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 12930452661fSBarry Smith PetscFree(a); 1294416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 12950452661fSBarry Smith PetscHeaderDestroy(B); 12960de55854SLois Curfman McInnes } 1297b7c46309SBarry Smith return 0; 1298b7c46309SBarry Smith } 1299b7c46309SBarry Smith 13005615d1e5SSatish Balay #undef __FUNC__ 13015615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ" 13024b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1303a008b906SSatish Balay { 13044b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 13054b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1306a008b906SSatish Balay int ierr,s1,s2,s3; 1307a008b906SSatish Balay 13084b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 13094b967eb1SSatish Balay if (rr) { 13104b967eb1SSatish Balay s3 = aij->n; 13114b967eb1SSatish Balay VecGetLocalSize_Fast(rr,s1); 1312e3372554SBarry Smith if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 13134b967eb1SSatish Balay /* Overlap communication with computation. */ 131443a90d84SBarry Smith ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1315a008b906SSatish Balay } 13164b967eb1SSatish Balay if (ll) { 13174b967eb1SSatish Balay VecGetLocalSize_Fast(ll,s1); 1318e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1319c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 13204b967eb1SSatish Balay } 13214b967eb1SSatish Balay /* scale the diagonal block */ 1322c351683dSSatish Balay ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 13234b967eb1SSatish Balay 13244b967eb1SSatish Balay if (rr) { 13254b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 132643a90d84SBarry Smith ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1327c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 13284b967eb1SSatish Balay } 13294b967eb1SSatish Balay 1330a008b906SSatish Balay return 0; 1331a008b906SSatish Balay } 1332a008b906SSatish Balay 1333a008b906SSatish Balay 1334682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 13355615d1e5SSatish Balay #undef __FUNC__ 13365615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIAIJ" 1337682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1338682d7d0cSBarry Smith { 1339682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1340682d7d0cSBarry Smith 1341682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1342682d7d0cSBarry Smith else return 0; 1343682d7d0cSBarry Smith } 1344682d7d0cSBarry Smith 13455615d1e5SSatish Balay #undef __FUNC__ 13465615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIAIJ" 13475a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 13485a838052SSatish Balay { 13495a838052SSatish Balay *bs = 1; 13505a838052SSatish Balay return 0; 13515a838052SSatish Balay } 13525615d1e5SSatish Balay #undef __FUNC__ 13535615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1354bb5a7306SBarry Smith static int MatSetUnfactored_MPIAIJ(Mat A) 1355bb5a7306SBarry Smith { 1356bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1357bb5a7306SBarry Smith int ierr; 1358bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1359bb5a7306SBarry Smith return 0; 1360bb5a7306SBarry Smith } 1361bb5a7306SBarry Smith 13625a838052SSatish Balay 1363fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 13643d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 13652f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 13660a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 13670a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 13688a729477SBarry Smith /* -------------------------------------------------------------------*/ 13692ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 137039e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 137144a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 137244a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 137336ce4990SBarry Smith 0,0, 137436ce4990SBarry Smith 0,0, 137536ce4990SBarry Smith 0,0, 137644a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1377b7c46309SBarry Smith MatTranspose_MPIAIJ, 1378a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1379a008b906SSatish Balay MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1380ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 13811eb62cbbSBarry Smith 0, 1382299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 138336ce4990SBarry Smith 0,0,0,0, 1384d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 138536ce4990SBarry Smith 0,0, 13863e85bedfSBarry Smith 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1387b49de8d1SLois Curfman McInnes 0,0,0, 1388598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1389052efed2SBarry Smith MatPrintHelp_MPIAIJ, 13903b2fbd54SBarry Smith MatScale_MPIAIJ,0,0,0, 13910a198c4cSBarry Smith MatGetBlockSize_MPIAIJ,0,0,0,0, 1392bb5a7306SBarry Smith MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 139336ce4990SBarry Smith 13948a729477SBarry Smith 13955615d1e5SSatish Balay #undef __FUNC__ 13965615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ" 13971987afe7SBarry Smith /*@C 1398ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 13993a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 14003a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 14013a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 14023a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 14038a729477SBarry Smith 14048a729477SBarry Smith Input Parameters: 14051eb62cbbSBarry Smith . comm - MPI communicator 14067d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 140792e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 140892e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 140992e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 141092e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 141192e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 14127d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 141392e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1414ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1415ff756334SLois Curfman McInnes (same for all local rows) 14162bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 141792e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 14182bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 14192bd5e0b2SLois Curfman McInnes it is zero. 14202bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1421ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 14222bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 14232bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 14242bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 14258a729477SBarry Smith 1426ff756334SLois Curfman McInnes Output Parameter: 142744cd7ae7SLois Curfman McInnes . A - the matrix 14288a729477SBarry Smith 1429ff756334SLois Curfman McInnes Notes: 1430ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1431ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 14320002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 14330002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1434ff756334SLois Curfman McInnes 1435ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1436ff756334SLois Curfman McInnes (possibly both). 1437ff756334SLois Curfman McInnes 14385511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 14395511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 14405511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 14415511cfe3SLois Curfman McInnes 14425511cfe3SLois Curfman McInnes Options Database Keys: 14435511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 14446e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 14456e62573dSLois Curfman McInnes $ (max limit=5) 14466e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 14476e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 14486e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 14495511cfe3SLois Curfman McInnes 1450e0245417SLois Curfman McInnes Storage Information: 1451e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1452e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1453e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1454e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1455e0245417SLois Curfman McInnes 1456e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 14575ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 14585ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 14595ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 14605ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1461ff756334SLois Curfman McInnes 14625511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 14635511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 14642191d07cSBarry Smith 1465b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1466b810aeb4SBarry Smith $ ------------------- 14675511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 14685511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 14695511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 14705511cfe3SLois Curfman McInnes $ ------------------- 1471b810aeb4SBarry Smith $ 1472b810aeb4SBarry Smith 14735511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 14745511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 14755511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 14765511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 14775511cfe3SLois Curfman McInnes 14785511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 14795511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 14805511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 14813d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 148292e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 14836da5968aSLois Curfman McInnes matrices. 14843a511b96SLois Curfman McInnes 1485dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1486ff756334SLois Curfman McInnes 1487fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 14888a729477SBarry Smith @*/ 14891eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 149044cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 14918a729477SBarry Smith { 149244cd7ae7SLois Curfman McInnes Mat B; 149344cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 149436ce4990SBarry Smith int ierr, i,sum[2],work[2],size; 1495416022c9SBarry Smith 149644cd7ae7SLois Curfman McInnes *A = 0; 149744cd7ae7SLois Curfman McInnes PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 149844cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 149944cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 150044cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 150144cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 150236ce4990SBarry Smith MPI_Comm_size(comm,&size); 150336ce4990SBarry Smith if (size == 1) { 150436ce4990SBarry Smith B->ops.getrowij = MatGetRowIJ_MPIAIJ; 150536ce4990SBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 150636ce4990SBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 150736ce4990SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 150836ce4990SBarry Smith B->ops.lufactor = MatLUFactor_MPIAIJ; 150936ce4990SBarry Smith B->ops.solve = MatSolve_MPIAIJ; 151036ce4990SBarry Smith B->ops.solveadd = MatSolveAdd_MPIAIJ; 151136ce4990SBarry Smith B->ops.solvetrans = MatSolveTrans_MPIAIJ; 151236ce4990SBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 151336ce4990SBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 151436ce4990SBarry Smith } 151544cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 151644cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 151744cd7ae7SLois Curfman McInnes B->factor = 0; 151844cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 151990f02eecSBarry Smith B->mapping = 0; 1520d6dfbf8fSBarry Smith 152144cd7ae7SLois Curfman McInnes b->insertmode = NOT_SET_VALUES; 152244cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 152344cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 15241eb62cbbSBarry Smith 1525b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1526e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 15271987afe7SBarry Smith 1528dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 15291eb62cbbSBarry Smith work[0] = m; work[1] = n; 1530d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1531dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1532dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 15331eb62cbbSBarry Smith } 153444cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 153544cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 153644cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 153744cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 153844cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 153944cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 15401eb62cbbSBarry Smith 15411eb62cbbSBarry Smith /* build local table of row and column ownerships */ 154244cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 154344cd7ae7SLois Curfman McInnes PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1544603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 154544cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 154644cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 154744cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 154844cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 15498a729477SBarry Smith } 155044cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 155144cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 155244cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 155344cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 155444cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 155544cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 15568a729477SBarry Smith } 155744cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 155844cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 15598a729477SBarry Smith 15605ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 156144cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 156244cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 15637b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 156444cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 156544cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 15668a729477SBarry Smith 15671eb62cbbSBarry Smith /* build cache for off array entries formed */ 156844cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 156990f02eecSBarry Smith b->donotstash = 0; 157044cd7ae7SLois Curfman McInnes b->colmap = 0; 157144cd7ae7SLois Curfman McInnes b->garray = 0; 157244cd7ae7SLois Curfman McInnes b->roworiented = 1; 15738a729477SBarry Smith 15741eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 157544cd7ae7SLois Curfman McInnes b->lvec = 0; 157644cd7ae7SLois Curfman McInnes b->Mvctx = 0; 15778a729477SBarry Smith 15787a0afa10SBarry Smith /* stuff for MatGetRow() */ 157944cd7ae7SLois Curfman McInnes b->rowindices = 0; 158044cd7ae7SLois Curfman McInnes b->rowvalues = 0; 158144cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 15827a0afa10SBarry Smith 158344cd7ae7SLois Curfman McInnes *A = B; 1584d6dfbf8fSBarry Smith return 0; 1585d6dfbf8fSBarry Smith } 1586c74985f6SBarry Smith 15875615d1e5SSatish Balay #undef __FUNC__ 15885615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ" 15893d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1590d6dfbf8fSBarry Smith { 1591d6dfbf8fSBarry Smith Mat mat; 1592416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1593a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1594d6dfbf8fSBarry Smith 1595416022c9SBarry Smith *newmat = 0; 15960452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1597a5a9c739SBarry Smith PLogObjectCreate(mat); 15980452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1599416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 160044a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 160144a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1602d6dfbf8fSBarry Smith mat->factor = matin->factor; 1603c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1604d6dfbf8fSBarry Smith 160544cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 160644cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 160744cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 160844cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1609d6dfbf8fSBarry Smith 1610416022c9SBarry Smith a->rstart = oldmat->rstart; 1611416022c9SBarry Smith a->rend = oldmat->rend; 1612416022c9SBarry Smith a->cstart = oldmat->cstart; 1613416022c9SBarry Smith a->cend = oldmat->cend; 161417699dbbSLois Curfman McInnes a->size = oldmat->size; 161517699dbbSLois Curfman McInnes a->rank = oldmat->rank; 161664119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1617bcd2baecSBarry Smith a->rowvalues = 0; 1618bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1619d6dfbf8fSBarry Smith 1620603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1621603f58a4SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1622603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1623603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1624416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 16252ee70a88SLois Curfman McInnes if (oldmat->colmap) { 16260452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1627416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1628416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1629416022c9SBarry Smith } else a->colmap = 0; 16303f41c07dSBarry Smith if (oldmat->garray) { 16313f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 16323f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1633464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 16343f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1635416022c9SBarry Smith } else a->garray = 0; 1636d6dfbf8fSBarry Smith 1637416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1638416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1639a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1640416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1641416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1642416022c9SBarry Smith PLogObjectParent(mat,a->A); 1643416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1644416022c9SBarry Smith PLogObjectParent(mat,a->B); 16455dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 164625cdf11fSBarry Smith if (flg) { 1647682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1648682d7d0cSBarry Smith } 16498a729477SBarry Smith *newmat = mat; 16508a729477SBarry Smith return 0; 16518a729477SBarry Smith } 1652416022c9SBarry Smith 165377c4ece6SBarry Smith #include "sys.h" 1654416022c9SBarry Smith 16555615d1e5SSatish Balay #undef __FUNC__ 16565615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ" 165719bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1658416022c9SBarry Smith { 1659d65a2f8fSBarry Smith Mat A; 1660416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1661d65a2f8fSBarry Smith Scalar *vals,*svals; 166219bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1663416022c9SBarry Smith MPI_Status status; 166417699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1665d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 166619bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1667416022c9SBarry Smith 166817699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 166917699dbbSLois Curfman McInnes if (!rank) { 167090ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 167177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1672e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1673416022c9SBarry Smith } 1674416022c9SBarry Smith 1675416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1676416022c9SBarry Smith M = header[1]; N = header[2]; 1677416022c9SBarry Smith /* determine ownership of all rows */ 167817699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 16790452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1680416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1681416022c9SBarry Smith rowners[0] = 0; 168217699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1683416022c9SBarry Smith rowners[i] += rowners[i-1]; 1684416022c9SBarry Smith } 168517699dbbSLois Curfman McInnes rstart = rowners[rank]; 168617699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1687416022c9SBarry Smith 1688416022c9SBarry Smith /* distribute row lengths to all processors */ 16890452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1690416022c9SBarry Smith offlens = ourlens + (rend-rstart); 169117699dbbSLois Curfman McInnes if (!rank) { 16920452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 169377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 16940452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 169517699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1696d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 16970452661fSBarry Smith PetscFree(sndcounts); 1698416022c9SBarry Smith } 1699416022c9SBarry Smith else { 1700416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1701416022c9SBarry Smith } 1702416022c9SBarry Smith 170317699dbbSLois Curfman McInnes if (!rank) { 1704416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 17050452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1706cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 170717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1708416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1709416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1710416022c9SBarry Smith } 1711416022c9SBarry Smith } 17120452661fSBarry Smith PetscFree(rowlengths); 1713416022c9SBarry Smith 1714416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1715416022c9SBarry Smith maxnz = 0; 171617699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 17170452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1718416022c9SBarry Smith } 17190452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1720416022c9SBarry Smith 1721416022c9SBarry Smith /* read in my part of the matrix column indices */ 1722416022c9SBarry Smith nz = procsnz[0]; 17230452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 172477c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1725d65a2f8fSBarry Smith 1726d65a2f8fSBarry Smith /* read in every one elses and ship off */ 172717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1728d65a2f8fSBarry Smith nz = procsnz[i]; 172977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1730d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1731d65a2f8fSBarry Smith } 17320452661fSBarry Smith PetscFree(cols); 1733416022c9SBarry Smith } 1734416022c9SBarry Smith else { 1735416022c9SBarry Smith /* determine buffer space needed for message */ 1736416022c9SBarry Smith nz = 0; 1737416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1738416022c9SBarry Smith nz += ourlens[i]; 1739416022c9SBarry Smith } 17400452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1741416022c9SBarry Smith 1742416022c9SBarry Smith /* receive message of column indices*/ 1743d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1744416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1745e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1746416022c9SBarry Smith } 1747416022c9SBarry Smith 1748416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1749cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1750416022c9SBarry Smith jj = 0; 1751416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1752416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1753d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1754416022c9SBarry Smith jj++; 1755416022c9SBarry Smith } 1756416022c9SBarry Smith } 1757d65a2f8fSBarry Smith 1758d65a2f8fSBarry Smith /* create our matrix */ 1759416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1760416022c9SBarry Smith ourlens[i] -= offlens[i]; 1761416022c9SBarry Smith } 1762d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1763d65a2f8fSBarry Smith A = *newmat; 17646d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1765d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1766d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1767d65a2f8fSBarry Smith } 1768416022c9SBarry Smith 176917699dbbSLois Curfman McInnes if (!rank) { 17700452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1771416022c9SBarry Smith 1772416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1773416022c9SBarry Smith nz = procsnz[0]; 177477c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1775d65a2f8fSBarry Smith 1776d65a2f8fSBarry Smith /* insert into matrix */ 1777d65a2f8fSBarry Smith jj = rstart; 1778d65a2f8fSBarry Smith smycols = mycols; 1779d65a2f8fSBarry Smith svals = vals; 1780d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1781d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1782d65a2f8fSBarry Smith smycols += ourlens[i]; 1783d65a2f8fSBarry Smith svals += ourlens[i]; 1784d65a2f8fSBarry Smith jj++; 1785416022c9SBarry Smith } 1786416022c9SBarry Smith 1787d65a2f8fSBarry Smith /* read in other processors and ship out */ 178817699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1789416022c9SBarry Smith nz = procsnz[i]; 179077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1791d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1792416022c9SBarry Smith } 17930452661fSBarry Smith PetscFree(procsnz); 1794416022c9SBarry Smith } 1795d65a2f8fSBarry Smith else { 1796d65a2f8fSBarry Smith /* receive numeric values */ 17970452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1798416022c9SBarry Smith 1799d65a2f8fSBarry Smith /* receive message of values*/ 1800d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1801d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1802e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1803d65a2f8fSBarry Smith 1804d65a2f8fSBarry Smith /* insert into matrix */ 1805d65a2f8fSBarry Smith jj = rstart; 1806d65a2f8fSBarry Smith smycols = mycols; 1807d65a2f8fSBarry Smith svals = vals; 1808d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1809d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1810d65a2f8fSBarry Smith smycols += ourlens[i]; 1811d65a2f8fSBarry Smith svals += ourlens[i]; 1812d65a2f8fSBarry Smith jj++; 1813d65a2f8fSBarry Smith } 1814d65a2f8fSBarry Smith } 18150452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1816d65a2f8fSBarry Smith 18176d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 18186d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1819416022c9SBarry Smith return 0; 1820416022c9SBarry Smith } 1821