xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c2653b3d308917c4b5698e5f59fd76add26d7f41)
1cb512458SBarry Smith #ifndef lint
2*c2653b3dSLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.192 1997/03/09 17:58:24 bsmith Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
7d9942c19SSatish Balay #include "src/inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
145615d1e5SSatish Balay #undef __FUNC__
155eea60f9SBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */
160a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat)
179e25ed09SBarry Smith {
1844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
19ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
20905e6a2fSBarry Smith   int        n = B->n,i;
21dbb450caSBarry Smith 
220452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
23464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
24cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
25905e6a2fSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
269e25ed09SBarry Smith   return 0;
279e25ed09SBarry Smith }
289e25ed09SBarry Smith 
292493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
302493cbb0SBarry Smith 
315615d1e5SSatish Balay #undef __FUNC__
325615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIAIJ"
333b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
343b2fbd54SBarry Smith                            PetscTruth *done)
35299609e3SLois Curfman McInnes {
36299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
37299609e3SLois Curfman McInnes   int        ierr;
3817699dbbSLois Curfman McInnes   if (aij->size == 1) {
393b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
40e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
413b2fbd54SBarry Smith   return 0;
423b2fbd54SBarry Smith }
433b2fbd54SBarry Smith 
445615d1e5SSatish Balay #undef __FUNC__
455eea60f9SBarry Smith #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */
463b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
473b2fbd54SBarry Smith                                PetscTruth *done)
483b2fbd54SBarry Smith {
493b2fbd54SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
503b2fbd54SBarry Smith   int        ierr;
513b2fbd54SBarry Smith   if (aij->size == 1) {
523b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
53e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
54299609e3SLois Curfman McInnes   return 0;
55299609e3SLois Curfman McInnes }
56299609e3SLois Curfman McInnes 
570520107fSSatish Balay #define CHUNKSIZE   15
58f5e9677aSSatish Balay #define MatSetValues_SeqAIJ_A_Private(A,row,col,value,addv) \
590520107fSSatish Balay { \
600520107fSSatish Balay  \
610520107fSSatish Balay     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
620520107fSSatish Balay     rmax = imax[row]; nrow = ailen[row];  \
63f5e9677aSSatish Balay     col1 = col - shift; \
64f5e9677aSSatish Balay      \
650520107fSSatish Balay       for ( _i=0; _i<nrow; _i++ ) { \
66f5e9677aSSatish Balay         if (rp[_i] > col1) break; \
67f5e9677aSSatish Balay         if (rp[_i] == col1) { \
680520107fSSatish Balay           if (addv == ADD_VALUES) ap[_i] += value;   \
690520107fSSatish Balay           else                  ap[_i] = value; \
700520107fSSatish Balay           goto _noinsert; \
710520107fSSatish Balay         } \
720520107fSSatish Balay       }  \
73f5e9677aSSatish Balay       if (nonew)  goto _noinsert; \
740520107fSSatish Balay       if (nrow >= rmax) { \
750520107fSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
760520107fSSatish Balay         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
770520107fSSatish Balay         Scalar *new_a; \
780520107fSSatish Balay  \
790520107fSSatish Balay         /* malloc new storage space */ \
800520107fSSatish Balay         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
810520107fSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
820520107fSSatish Balay         new_j   = (int *) (new_a + new_nz); \
830520107fSSatish Balay         new_i   = new_j + new_nz; \
840520107fSSatish Balay  \
850520107fSSatish Balay         /* copy over old data into new slots */ \
860520107fSSatish Balay         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
870520107fSSatish Balay         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
880520107fSSatish Balay         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
890520107fSSatish Balay         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
900520107fSSatish Balay         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
910520107fSSatish Balay                                                            len*sizeof(int)); \
920520107fSSatish Balay         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
930520107fSSatish Balay         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
940520107fSSatish Balay                                                            len*sizeof(Scalar));  \
950520107fSSatish Balay         /* free up old matrix storage */ \
96f5e9677aSSatish Balay  \
970520107fSSatish Balay         PetscFree(a->a);  \
980520107fSSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
990520107fSSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1000520107fSSatish Balay         a->singlemalloc = 1; \
1010520107fSSatish Balay  \
1020520107fSSatish Balay         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
1030520107fSSatish Balay         rmax = imax[row] = imax[row] + CHUNKSIZE; \
1040520107fSSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
1050520107fSSatish Balay         a->maxnz += CHUNKSIZE; \
1060520107fSSatish Balay         a->reallocs++; \
1070520107fSSatish Balay       } \
1080520107fSSatish Balay       N = nrow++ - 1; a->nz++; \
1090520107fSSatish Balay       /* shift up all the later entries in this row */ \
1100520107fSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
1110520107fSSatish Balay         rp[ii+1] = rp[ii]; \
1120520107fSSatish Balay         ap[ii+1] = ap[ii]; \
1130520107fSSatish Balay       } \
114f5e9677aSSatish Balay       rp[_i] = col1;  \
1150520107fSSatish Balay       ap[_i] = value;  \
1160520107fSSatish Balay       _noinsert: ; \
1170520107fSSatish Balay       ailen[row] = nrow; \
1180520107fSSatish Balay }
1190a198c4cSBarry Smith 
1200520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
1215615d1e5SSatish Balay #undef __FUNC__
1225615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ"
1234b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
1248a729477SBarry Smith {
12544a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1264b0e389bSBarry Smith   Scalar     value;
1271eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
1281eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
129905e6a2fSBarry Smith   int        roworiented = aij->roworiented;
1308a729477SBarry Smith 
1310520107fSSatish Balay   /* Some Variables required in the macro */
1324ee7247eSSatish Balay   Mat        A = aij->A;
1334ee7247eSSatish Balay   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
1344ee7247eSSatish Balay   int        *rp,ii,nrow,_i,rmax, N, col1;
1350520107fSSatish Balay   int        *imax = a->imax, *ai = a->i, *ailen = a->ilen;
1364ee7247eSSatish Balay   int        *aj = a->j, nonew = a->nonew,shift = a->indexshift;
1370520107fSSatish Balay   Scalar     *ap, *aa = a->a;
1384ee7247eSSatish Balay 
1398a729477SBarry Smith   for ( i=0; i<m; i++ ) {
1400a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
141e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
142e3372554SBarry Smith     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
1430a198c4cSBarry Smith #endif
1444b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
1454b0e389bSBarry Smith       row = im[i] - rstart;
1461eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
1474b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
1484b0e389bSBarry Smith           col = in[j] - cstart;
1494b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
150f5e9677aSSatish Balay           MatSetValues_SeqAIJ_A_Private(aij->A,row,col,value,addv);
1510520107fSSatish Balay           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
1521eb62cbbSBarry Smith         }
1530a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
154e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
155e3372554SBarry Smith         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
1560a198c4cSBarry Smith #endif
1571eb62cbbSBarry Smith         else {
158227d817aSBarry Smith           if (mat->was_assembled) {
159905e6a2fSBarry Smith             if (!aij->colmap) {
160905e6a2fSBarry Smith               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
161905e6a2fSBarry Smith             }
162905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
163ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
1642493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
1654b0e389bSBarry Smith               col =  in[j];
166d6dfbf8fSBarry Smith             }
1679e25ed09SBarry Smith           }
1684b0e389bSBarry Smith           else col = in[j];
1694b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
1700a198c4cSBarry Smith           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
1711eb62cbbSBarry Smith         }
1721eb62cbbSBarry Smith       }
1731eb62cbbSBarry Smith     }
1741eb62cbbSBarry Smith     else {
17590f02eecSBarry Smith       if (roworiented && !aij->donotstash) {
1764b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
1774b0e389bSBarry Smith       }
1784b0e389bSBarry Smith       else {
17990f02eecSBarry Smith         if (!aij->donotstash) {
1804b0e389bSBarry Smith           row = im[i];
1814b0e389bSBarry Smith           for ( j=0; j<n; j++ ) {
1824b0e389bSBarry Smith             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
1834b0e389bSBarry Smith           }
1844b0e389bSBarry Smith         }
1851eb62cbbSBarry Smith       }
1868a729477SBarry Smith     }
18790f02eecSBarry Smith   }
1888a729477SBarry Smith   return 0;
1898a729477SBarry Smith }
1908a729477SBarry Smith 
1915615d1e5SSatish Balay #undef __FUNC__
1925615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ"
193b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
194b49de8d1SLois Curfman McInnes {
195b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
196b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
197b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
198b49de8d1SLois Curfman McInnes 
199b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
200e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
201e3372554SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
202b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
203b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
204b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
205e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
206e3372554SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
207b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
208b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
209b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
210b49de8d1SLois Curfman McInnes         }
211b49de8d1SLois Curfman McInnes         else {
212905e6a2fSBarry Smith           if (!aij->colmap) {
213905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
214905e6a2fSBarry Smith           }
215905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
216e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
217d9d09a02SSatish Balay           else {
218b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
219b49de8d1SLois Curfman McInnes           }
220b49de8d1SLois Curfman McInnes         }
221b49de8d1SLois Curfman McInnes       }
222d9d09a02SSatish Balay     }
223b49de8d1SLois Curfman McInnes     else {
224e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
225b49de8d1SLois Curfman McInnes     }
226b49de8d1SLois Curfman McInnes   }
227b49de8d1SLois Curfman McInnes   return 0;
228b49de8d1SLois Curfman McInnes }
229b49de8d1SLois Curfman McInnes 
2305615d1e5SSatish Balay #undef __FUNC__
2315615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
232fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
2338a729477SBarry Smith {
23444a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
235d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
23617699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
23717699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
2381eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
2396abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
2401eb62cbbSBarry Smith   InsertMode  addv;
2411eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
2421eb62cbbSBarry Smith 
2431eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
24447794344SBarry Smith   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
245dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
246e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
2471eb62cbbSBarry Smith   }
24847794344SBarry Smith   mat->insertmode = addv; /* in case this processor had no cache */
2491eb62cbbSBarry Smith 
2501eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
2510452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
252cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
2530452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
2541eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2551eb62cbbSBarry Smith     idx = aij->stash.idx[i];
25617699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2571eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
2581eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
2598a729477SBarry Smith       }
2608a729477SBarry Smith     }
2618a729477SBarry Smith   }
26217699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2631eb62cbbSBarry Smith 
2641eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
2650452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
26617699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
26717699dbbSLois Curfman McInnes   nreceives = work[rank];
26817699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
26917699dbbSLois Curfman McInnes   nmax = work[rank];
2700452661fSBarry Smith   PetscFree(work);
2711eb62cbbSBarry Smith 
2721eb62cbbSBarry Smith   /* post receives:
2731eb62cbbSBarry Smith        1) each message will consist of ordered pairs
2741eb62cbbSBarry Smith      (global index,value) we store the global index as a double
275d6dfbf8fSBarry Smith      to simplify the message passing.
2761eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
2771eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
2781eb62cbbSBarry Smith      this is a lot of wasted space.
2791eb62cbbSBarry Smith 
2801eb62cbbSBarry Smith 
2811eb62cbbSBarry Smith        This could be done better.
2821eb62cbbSBarry Smith   */
2830452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
28478b31e54SBarry Smith   CHKPTRQ(rvalues);
2850452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
28678b31e54SBarry Smith   CHKPTRQ(recv_waits);
2871eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
288416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
2891eb62cbbSBarry Smith               comm,recv_waits+i);
2901eb62cbbSBarry Smith   }
2911eb62cbbSBarry Smith 
2921eb62cbbSBarry Smith   /* do sends:
2931eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
2941eb62cbbSBarry Smith          the ith processor
2951eb62cbbSBarry Smith   */
2960452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
2970452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
29878b31e54SBarry Smith   CHKPTRQ(send_waits);
2990452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
3001eb62cbbSBarry Smith   starts[0] = 0;
30117699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3021eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
3031eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
3041eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
3051eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
3061eb62cbbSBarry Smith   }
3070452661fSBarry Smith   PetscFree(owner);
3081eb62cbbSBarry Smith   starts[0] = 0;
30917699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3101eb62cbbSBarry Smith   count = 0;
31117699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3121eb62cbbSBarry Smith     if (procs[i]) {
313416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
3141eb62cbbSBarry Smith                 comm,send_waits+count++);
3151eb62cbbSBarry Smith     }
3161eb62cbbSBarry Smith   }
3170452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
3181eb62cbbSBarry Smith 
3191eb62cbbSBarry Smith   /* Free cache space */
32055908cccSLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
32178b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
3221eb62cbbSBarry Smith 
3231eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
3241eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
3251eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
3261eb62cbbSBarry Smith   aij->rmax       = nmax;
3271eb62cbbSBarry Smith 
3281eb62cbbSBarry Smith   return 0;
3291eb62cbbSBarry Smith }
33044a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
3311eb62cbbSBarry Smith 
3325615d1e5SSatish Balay #undef __FUNC__
3335615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
334fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
3351eb62cbbSBarry Smith {
33644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
3371eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
338416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
339905e6a2fSBarry Smith   int         row,col,other_disassembled;
3401eb62cbbSBarry Smith   Scalar      *values,val;
34147794344SBarry Smith   InsertMode  addv = mat->insertmode;
3421eb62cbbSBarry Smith 
3431eb62cbbSBarry Smith   /*  wait on receives */
3441eb62cbbSBarry Smith   while (count) {
345d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
3461eb62cbbSBarry Smith     /* unpack receives into our local space */
347d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
348c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
3491eb62cbbSBarry Smith     n = n/3;
3501eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
351227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
352227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
3531eb62cbbSBarry Smith       val = values[3*i+2];
3541eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
3551eb62cbbSBarry Smith         col -= aij->cstart;
3561eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
3571eb62cbbSBarry Smith       }
3581eb62cbbSBarry Smith       else {
35955a1b374SBarry Smith         if (mat->was_assembled || mat->assembled) {
360905e6a2fSBarry Smith           if (!aij->colmap) {
361905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
362905e6a2fSBarry Smith           }
363905e6a2fSBarry Smith           col = aij->colmap[col] - 1;
364ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
3652493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
366227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
367d6dfbf8fSBarry Smith           }
3689e25ed09SBarry Smith         }
3691eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
3701eb62cbbSBarry Smith       }
3711eb62cbbSBarry Smith     }
3721eb62cbbSBarry Smith     count--;
3731eb62cbbSBarry Smith   }
3740452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
3751eb62cbbSBarry Smith 
3761eb62cbbSBarry Smith   /* wait on sends */
3771eb62cbbSBarry Smith   if (aij->nsends) {
3780a198c4cSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
3791eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
3800452661fSBarry Smith     PetscFree(send_status);
3811eb62cbbSBarry Smith   }
3820452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
3831eb62cbbSBarry Smith 
38478b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
38578b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
3861eb62cbbSBarry Smith 
3872493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
3882493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
389227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
390227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
3912493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
3922493cbb0SBarry Smith   }
3932493cbb0SBarry Smith 
3946d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
39578b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
3965e42470aSBarry Smith   }
39778b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
39878b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
3995e42470aSBarry Smith 
4007a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
4018a729477SBarry Smith   return 0;
4028a729477SBarry Smith }
4038a729477SBarry Smith 
4045615d1e5SSatish Balay #undef __FUNC__
4055615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ"
4062ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
4071eb62cbbSBarry Smith {
40844a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
409dbd7a890SLois Curfman McInnes   int        ierr;
41078b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
41178b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
4121eb62cbbSBarry Smith   return 0;
4131eb62cbbSBarry Smith }
4141eb62cbbSBarry Smith 
4151eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
4161eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
4171eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
4181eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
4191eb62cbbSBarry Smith    routine.
4201eb62cbbSBarry Smith */
4215615d1e5SSatish Balay #undef __FUNC__
4225615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ"
42344a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
4241eb62cbbSBarry Smith {
42544a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
42617699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
4276abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
42817699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
4295392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
430d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
431d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
4321eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
4331eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
4341eb62cbbSBarry Smith   IS             istmp;
4351eb62cbbSBarry Smith 
43677c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
43778b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
4381eb62cbbSBarry Smith 
4391eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
4400452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
441cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
4420452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
4431eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
4441eb62cbbSBarry Smith     idx = rows[i];
4451eb62cbbSBarry Smith     found = 0;
44617699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
4471eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
4481eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
4491eb62cbbSBarry Smith       }
4501eb62cbbSBarry Smith     }
451e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
4521eb62cbbSBarry Smith   }
45317699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
4541eb62cbbSBarry Smith 
4551eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
4560452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
45717699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
45817699dbbSLois Curfman McInnes   nrecvs = work[rank];
45917699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
46017699dbbSLois Curfman McInnes   nmax = work[rank];
4610452661fSBarry Smith   PetscFree(work);
4621eb62cbbSBarry Smith 
4631eb62cbbSBarry Smith   /* post receives:   */
4640452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
46578b31e54SBarry Smith   CHKPTRQ(rvalues);
4660452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
46778b31e54SBarry Smith   CHKPTRQ(recv_waits);
4681eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
469416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
4701eb62cbbSBarry Smith   }
4711eb62cbbSBarry Smith 
4721eb62cbbSBarry Smith   /* do sends:
4731eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
4741eb62cbbSBarry Smith          the ith processor
4751eb62cbbSBarry Smith   */
4760452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
4770452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
47878b31e54SBarry Smith   CHKPTRQ(send_waits);
4790452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
4801eb62cbbSBarry Smith   starts[0] = 0;
48117699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4821eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
4831eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
4841eb62cbbSBarry Smith   }
4851eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
4861eb62cbbSBarry Smith 
4871eb62cbbSBarry Smith   starts[0] = 0;
48817699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4891eb62cbbSBarry Smith   count = 0;
49017699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
4911eb62cbbSBarry Smith     if (procs[i]) {
492416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
4931eb62cbbSBarry Smith     }
4941eb62cbbSBarry Smith   }
4950452661fSBarry Smith   PetscFree(starts);
4961eb62cbbSBarry Smith 
49717699dbbSLois Curfman McInnes   base = owners[rank];
4981eb62cbbSBarry Smith 
4991eb62cbbSBarry Smith   /*  wait on receives */
5000452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
5011eb62cbbSBarry Smith   source = lens + nrecvs;
5021eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
5031eb62cbbSBarry Smith   while (count) {
504d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
5051eb62cbbSBarry Smith     /* unpack receives into our local space */
5061eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
507d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
508d6dfbf8fSBarry Smith     lens[imdex]  = n;
5091eb62cbbSBarry Smith     slen += n;
5101eb62cbbSBarry Smith     count--;
5111eb62cbbSBarry Smith   }
5120452661fSBarry Smith   PetscFree(recv_waits);
5131eb62cbbSBarry Smith 
5141eb62cbbSBarry Smith   /* move the data into the send scatter */
5150452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
5161eb62cbbSBarry Smith   count = 0;
5171eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
5181eb62cbbSBarry Smith     values = rvalues + i*nmax;
5191eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
5201eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
5211eb62cbbSBarry Smith     }
5221eb62cbbSBarry Smith   }
5230452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
5240452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
5251eb62cbbSBarry Smith 
5261eb62cbbSBarry Smith   /* actually zap the local rows */
527537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
528464493b3SBarry Smith   PLogObjectParent(A,istmp);
5290452661fSBarry Smith   PetscFree(lrows);
53078b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
53178b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
53278b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
5331eb62cbbSBarry Smith 
5341eb62cbbSBarry Smith   /* wait on sends */
5351eb62cbbSBarry Smith   if (nsends) {
5360452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
53778b31e54SBarry Smith     CHKPTRQ(send_status);
5381eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
5390452661fSBarry Smith     PetscFree(send_status);
5401eb62cbbSBarry Smith   }
5410452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
5421eb62cbbSBarry Smith 
5431eb62cbbSBarry Smith   return 0;
5441eb62cbbSBarry Smith }
5451eb62cbbSBarry Smith 
5465615d1e5SSatish Balay #undef __FUNC__
5475615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ"
548416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
5491eb62cbbSBarry Smith {
550416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
551fbd6ef76SBarry Smith   int        ierr,nt;
552416022c9SBarry Smith 
553a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
554fbd6ef76SBarry Smith   if (nt != a->n) {
555f40265daSLois Curfman McInnes     SETERRQ(1,0,"Incompatible partition of A and xx");
556fbd6ef76SBarry Smith   }
55743a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
55838597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
55943a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
56038597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
5611eb62cbbSBarry Smith   return 0;
5621eb62cbbSBarry Smith }
5631eb62cbbSBarry Smith 
5645615d1e5SSatish Balay #undef __FUNC__
5655615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ"
566416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
567da3a660dSBarry Smith {
568416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
569da3a660dSBarry Smith   int        ierr;
57043a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
57138597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
57243a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
57338597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
574da3a660dSBarry Smith   return 0;
575da3a660dSBarry Smith }
576da3a660dSBarry Smith 
5775615d1e5SSatish Balay #undef __FUNC__
5785615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ"
579416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
580da3a660dSBarry Smith {
581416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
582da3a660dSBarry Smith   int        ierr;
583da3a660dSBarry Smith 
584da3a660dSBarry Smith   /* do nondiagonal part */
58538597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
586da3a660dSBarry Smith   /* send it on its way */
587537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
588da3a660dSBarry Smith   /* do local part */
58938597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
590da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
591da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
592da3a660dSBarry Smith   /* but is not perhaps always true. */
593537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
594da3a660dSBarry Smith   return 0;
595da3a660dSBarry Smith }
596da3a660dSBarry Smith 
5975615d1e5SSatish Balay #undef __FUNC__
5985615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ"
599416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600da3a660dSBarry Smith {
601416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
602da3a660dSBarry Smith   int        ierr;
603da3a660dSBarry Smith 
604da3a660dSBarry Smith   /* do nondiagonal part */
60538597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
606da3a660dSBarry Smith   /* send it on its way */
607537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
608da3a660dSBarry Smith   /* do local part */
60938597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
610da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
611da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
612da3a660dSBarry Smith   /* but is not perhaps always true. */
6130a198c4cSBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
614da3a660dSBarry Smith   return 0;
615da3a660dSBarry Smith }
616da3a660dSBarry Smith 
6171eb62cbbSBarry Smith /*
6181eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
6191eb62cbbSBarry Smith    diagonal block
6201eb62cbbSBarry Smith */
6215615d1e5SSatish Balay #undef __FUNC__
6225615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ"
623416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
6241eb62cbbSBarry Smith {
625416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
62644cd7ae7SLois Curfman McInnes   if (a->M != a->N)
627e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
6285baf8537SBarry Smith   if (a->rstart != a->cstart || a->rend != a->cend) {
629e3372554SBarry Smith     SETERRQ(1,0,"row partition must equal col partition");  }
630416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
6311eb62cbbSBarry Smith }
6321eb62cbbSBarry Smith 
6335615d1e5SSatish Balay #undef __FUNC__
6345615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ"
635052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
636052efed2SBarry Smith {
637052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
638052efed2SBarry Smith   int        ierr;
639052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
640052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
641052efed2SBarry Smith   return 0;
642052efed2SBarry Smith }
643052efed2SBarry Smith 
6445615d1e5SSatish Balay #undef __FUNC__
6455eea60f9SBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */
64644a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
6471eb62cbbSBarry Smith {
6481eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
64944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
6501eb62cbbSBarry Smith   int        ierr;
651a5a9c739SBarry Smith #if defined(PETSC_LOG)
6526e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
653a5a9c739SBarry Smith #endif
6540452661fSBarry Smith   PetscFree(aij->rowners);
65578b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
65678b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
6570452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
6580452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
6591eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
660a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
6617a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
6620452661fSBarry Smith   PetscFree(aij);
66390f02eecSBarry Smith   if (mat->mapping) {
66490f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
66590f02eecSBarry Smith   }
666a5a9c739SBarry Smith   PLogObjectDestroy(mat);
6670452661fSBarry Smith   PetscHeaderDestroy(mat);
6681eb62cbbSBarry Smith   return 0;
6691eb62cbbSBarry Smith }
6707857610eSBarry Smith #include "draw.h"
671b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
672ee50ffe9SBarry Smith 
6735615d1e5SSatish Balay #undef __FUNC__
6745eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */
675416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
6761eb62cbbSBarry Smith {
677416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
678416022c9SBarry Smith   int         ierr;
679416022c9SBarry Smith 
68017699dbbSLois Curfman McInnes   if (aij->size == 1) {
681416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
682416022c9SBarry Smith   }
683e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
684416022c9SBarry Smith   return 0;
685416022c9SBarry Smith }
686416022c9SBarry Smith 
6875615d1e5SSatish Balay #undef __FUNC__
6885eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */
689d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
690416022c9SBarry Smith {
69144a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
692dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
693a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
694d636dbe3SBarry Smith   FILE        *fd;
69519bcc07fSBarry Smith   ViewerType  vtype;
696416022c9SBarry Smith 
69719bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
69819bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
69990ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
7000a198c4cSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
7014e220ebcSLois Curfman McInnes       MatInfo info;
7024e220ebcSLois Curfman McInnes       int     flg;
703a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
70490ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
7054e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
70695e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
70777c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
70895e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
7094e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
71095e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
7114e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
7124e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
7134e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
7144e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
7154e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
716a56f8943SBarry Smith       fflush(fd);
71777c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
718a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
719416022c9SBarry Smith       return 0;
720416022c9SBarry Smith     }
7210a198c4cSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
72208480c60SBarry Smith       return 0;
72308480c60SBarry Smith     }
724416022c9SBarry Smith   }
725416022c9SBarry Smith 
72619bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
72719bcc07fSBarry Smith     Draw       draw;
72819bcc07fSBarry Smith     PetscTruth isnull;
72919bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
73019bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
73119bcc07fSBarry Smith   }
73219bcc07fSBarry Smith 
73319bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
73490ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
73577c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
736d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
73717699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
7381eb62cbbSBarry Smith            aij->cend);
73978b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
74078b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
741d13ab20cSBarry Smith     fflush(fd);
74277c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
743d13ab20cSBarry Smith   }
744416022c9SBarry Smith   else {
745a56f8943SBarry Smith     int size = aij->size;
746a56f8943SBarry Smith     rank = aij->rank;
74717699dbbSLois Curfman McInnes     if (size == 1) {
74878b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
74995373324SBarry Smith     }
75095373324SBarry Smith     else {
75195373324SBarry Smith       /* assemble the entire matrix onto first processor. */
75295373324SBarry Smith       Mat         A;
753ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
7542eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
75595373324SBarry Smith       Scalar      *a;
7562ee70a88SLois Curfman McInnes 
75717699dbbSLois Curfman McInnes       if (!rank) {
758b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
759c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
76095373324SBarry Smith       }
76195373324SBarry Smith       else {
762b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
763c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
76495373324SBarry Smith       }
765464493b3SBarry Smith       PLogObjectParent(mat,A);
766416022c9SBarry Smith 
76795373324SBarry Smith       /* copy over the A part */
768ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
7692ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
77095373324SBarry Smith       row = aij->rstart;
771dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
77295373324SBarry Smith       for ( i=0; i<m; i++ ) {
773416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
77495373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
77595373324SBarry Smith       }
7762ee70a88SLois Curfman McInnes       aj = Aloc->j;
777dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
77895373324SBarry Smith 
77995373324SBarry Smith       /* copy over the B part */
780ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
7812ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
78295373324SBarry Smith       row = aij->rstart;
7830452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
784dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
78595373324SBarry Smith       for ( i=0; i<m; i++ ) {
786416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
78795373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
78895373324SBarry Smith       }
7890452661fSBarry Smith       PetscFree(ct);
7906d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7916d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
79217699dbbSLois Curfman McInnes       if (!rank) {
79378b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
79495373324SBarry Smith       }
79578b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
79695373324SBarry Smith     }
79795373324SBarry Smith   }
7981eb62cbbSBarry Smith   return 0;
7991eb62cbbSBarry Smith }
8001eb62cbbSBarry Smith 
8015615d1e5SSatish Balay #undef __FUNC__
8025eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */
803416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
804416022c9SBarry Smith {
805416022c9SBarry Smith   Mat         mat = (Mat) obj;
806416022c9SBarry Smith   int         ierr;
80719bcc07fSBarry Smith   ViewerType  vtype;
808416022c9SBarry Smith 
80919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
81019bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
81119bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
812d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
813416022c9SBarry Smith   }
81419bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
815416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
816416022c9SBarry Smith   }
817416022c9SBarry Smith   return 0;
818416022c9SBarry Smith }
819416022c9SBarry Smith 
8201eb62cbbSBarry Smith /*
8211eb62cbbSBarry Smith     This has to provide several versions.
8221eb62cbbSBarry Smith 
8231eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
8241eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
825d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
8261eb62cbbSBarry Smith */
8275615d1e5SSatish Balay #undef __FUNC__
8285615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ"
829fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
830dbb450caSBarry Smith                            double fshift,int its,Vec xx)
8318a729477SBarry Smith {
83244a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
833d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
834ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
835c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
8366abc6512SBarry Smith   int        ierr,*idx, *diag;
837416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
8388a729477SBarry Smith 
839d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
840dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
841dbb450caSBarry Smith   ls = ls + shift;
842ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
843d6dfbf8fSBarry Smith   diag = A->diag;
844c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
845da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
84638597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
847da3a660dSBarry Smith     }
84843a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
84978b31e54SBarry Smith     CHKERRQ(ierr);
85043a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
85178b31e54SBarry Smith     CHKERRQ(ierr);
852d6dfbf8fSBarry Smith     while (its--) {
853d6dfbf8fSBarry Smith       /* go down through the rows */
854d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
855d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
856dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
857dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
858d6dfbf8fSBarry Smith         sum  = b[i];
859d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
860dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
861d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
862dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
863dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
864d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
86555a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
866d6dfbf8fSBarry Smith       }
867d6dfbf8fSBarry Smith       /* come up through the rows */
868d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
869d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
870dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
871dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
872d6dfbf8fSBarry Smith         sum  = b[i];
873d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
874dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
875d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
876dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
877dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
878d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
87955a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
880d6dfbf8fSBarry Smith       }
881d6dfbf8fSBarry Smith     }
882d6dfbf8fSBarry Smith   }
883d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
884da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
88538597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
886da3a660dSBarry Smith     }
88743a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
88878b31e54SBarry Smith     CHKERRQ(ierr);
88943a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
89078b31e54SBarry Smith     CHKERRQ(ierr);
891d6dfbf8fSBarry Smith     while (its--) {
892d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
893d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
894dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
895dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
896d6dfbf8fSBarry Smith         sum  = b[i];
897d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
898dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
899d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
900dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
901dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
902d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
90355a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
904d6dfbf8fSBarry Smith       }
905d6dfbf8fSBarry Smith     }
906d6dfbf8fSBarry Smith   }
907d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
908da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
90938597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
910da3a660dSBarry Smith     }
91143a90d84SBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
91278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
91343a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
91478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
915d6dfbf8fSBarry Smith     while (its--) {
916d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
917d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
918dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
919dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
920d6dfbf8fSBarry Smith         sum  = b[i];
921d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
922dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
923d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
924dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
925dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
926d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
92755a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
928d6dfbf8fSBarry Smith       }
929d6dfbf8fSBarry Smith     }
930d6dfbf8fSBarry Smith   }
931c16cb8f2SBarry Smith   else {
932e3372554SBarry Smith     SETERRQ(1,0,"Option not supported");
933c16cb8f2SBarry Smith   }
9348a729477SBarry Smith   return 0;
9358a729477SBarry Smith }
936a66be287SLois Curfman McInnes 
9375615d1e5SSatish Balay #undef __FUNC__
9385eea60f9SBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */
9394e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
940a66be287SLois Curfman McInnes {
941a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
942a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
9434e220ebcSLois Curfman McInnes   int        ierr;
9444e220ebcSLois Curfman McInnes   double     isend[5], irecv[5];
945a66be287SLois Curfman McInnes 
9464e220ebcSLois Curfman McInnes   info->rows_global    = (double)mat->M;
9474e220ebcSLois Curfman McInnes   info->columns_global = (double)mat->N;
9484e220ebcSLois Curfman McInnes   info->rows_local     = (double)mat->m;
9494e220ebcSLois Curfman McInnes   info->columns_local  = (double)mat->N;
9504e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
9514e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
9524e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
9534e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
9544e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
9554e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
9564e220ebcSLois Curfman McInnes   isend[3] += info->memory;  isend[4] += info->mallocs;
957a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
9584e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
9594e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
9604e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
9614e220ebcSLois Curfman McInnes     info->memory       = isend[3];
9624e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
963a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
9644e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
9654e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9664e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9674e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9684e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9694e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
970a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
9714e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
9724e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9734e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9744e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9754e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9764e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
977a66be287SLois Curfman McInnes   }
9784e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
9794e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
9804e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
9814e220ebcSLois Curfman McInnes 
982a66be287SLois Curfman McInnes   return 0;
983a66be287SLois Curfman McInnes }
984a66be287SLois Curfman McInnes 
985299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
986299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
987299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
988299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
989299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
990299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
991299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
992299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
993299609e3SLois Curfman McInnes 
9945615d1e5SSatish Balay #undef __FUNC__
9955eea60f9SBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */
996416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
997c74985f6SBarry Smith {
998c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
999c74985f6SBarry Smith 
10006d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
10016d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
10026da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1003*c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
1004*c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1005b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1006b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1007b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1008aeafbbfcSLois Curfman McInnes     a->roworiented = 1;
1009c0bbcb79SLois Curfman McInnes     MatSetOption(a->A,op);
1010c0bbcb79SLois Curfman McInnes     MatSetOption(a->B,op);
1011b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
10126da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
10136d4a8577SBarry Smith              op == MAT_SYMMETRIC ||
10146d4a8577SBarry Smith              op == MAT_STRUCTURALLY_SYMMETRIC ||
10156d4a8577SBarry Smith              op == MAT_YES_NEW_DIAGONALS)
101694a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10176d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
10184b0e389bSBarry Smith     a->roworiented = 0;
10194b0e389bSBarry Smith     MatSetOption(a->A,op);
10204b0e389bSBarry Smith     MatSetOption(a->B,op);
102190f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
102290f02eecSBarry Smith     a->donotstash = 1;
102390f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1024e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1025c0bbcb79SLois Curfman McInnes   else
1026e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1027c74985f6SBarry Smith   return 0;
1028c74985f6SBarry Smith }
1029c74985f6SBarry Smith 
10305615d1e5SSatish Balay #undef __FUNC__
10315eea60f9SBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */
1032d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1033c74985f6SBarry Smith {
103444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1035c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1036c74985f6SBarry Smith   return 0;
1037c74985f6SBarry Smith }
1038c74985f6SBarry Smith 
10395615d1e5SSatish Balay #undef __FUNC__
10405eea60f9SBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */
1041d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1042c74985f6SBarry Smith {
104344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1044b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1045c74985f6SBarry Smith   return 0;
1046c74985f6SBarry Smith }
1047c74985f6SBarry Smith 
10485615d1e5SSatish Balay #undef __FUNC__
10495eea60f9SBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */
1050d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1051c74985f6SBarry Smith {
105244a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1053c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1054c74985f6SBarry Smith   return 0;
1055c74985f6SBarry Smith }
1056c74985f6SBarry Smith 
10576d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
10586d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
10596d84be18SBarry Smith 
10605615d1e5SSatish Balay #undef __FUNC__
10615615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ"
10626d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
106339e00950SLois Curfman McInnes {
1064154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
106570f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1066154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1067154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
106870f0671dSBarry Smith   int        *cmap, *idx_p;
106939e00950SLois Curfman McInnes 
1070e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
10717a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
10727a0afa10SBarry Smith 
107370f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
10747a0afa10SBarry Smith     /*
10757a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
10767a0afa10SBarry Smith     */
10777a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1078c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
1079c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
10807a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
10817a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
10827a0afa10SBarry Smith     }
10837a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
10847a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
10857a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
10867a0afa10SBarry Smith   }
10877a0afa10SBarry Smith 
1088e3372554SBarry Smith   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1089abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
109039e00950SLois Curfman McInnes 
1091154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1092154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1093154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
109438597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
109538597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1096154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1097154123eaSLois Curfman McInnes 
109870f0671dSBarry Smith   cmap  = mat->garray;
1099154123eaSLois Curfman McInnes   if (v  || idx) {
1100154123eaSLois Curfman McInnes     if (nztot) {
1101154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
110270f0671dSBarry Smith       int imark = -1;
1103154123eaSLois Curfman McInnes       if (v) {
110470f0671dSBarry Smith         *v = v_p = mat->rowvalues;
110539e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
110670f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1107154123eaSLois Curfman McInnes           else break;
1108154123eaSLois Curfman McInnes         }
1109154123eaSLois Curfman McInnes         imark = i;
111070f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
111170f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1112154123eaSLois Curfman McInnes       }
1113154123eaSLois Curfman McInnes       if (idx) {
111470f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
111570f0671dSBarry Smith         if (imark > -1) {
111670f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
111770f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
111870f0671dSBarry Smith           }
111970f0671dSBarry Smith         } else {
1120154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
112170f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1122154123eaSLois Curfman McInnes             else break;
1123154123eaSLois Curfman McInnes           }
1124154123eaSLois Curfman McInnes           imark = i;
112570f0671dSBarry Smith         }
112670f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
112770f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
112839e00950SLois Curfman McInnes       }
112939e00950SLois Curfman McInnes     }
11301ca473b0SSatish Balay     else {
11311ca473b0SSatish Balay       if (idx) *idx = 0;
11321ca473b0SSatish Balay       if (v)   *v   = 0;
11331ca473b0SSatish Balay     }
1134154123eaSLois Curfman McInnes   }
113539e00950SLois Curfman McInnes   *nz = nztot;
113638597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
113738597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
113839e00950SLois Curfman McInnes   return 0;
113939e00950SLois Curfman McInnes }
114039e00950SLois Curfman McInnes 
11415615d1e5SSatish Balay #undef __FUNC__
11425eea60f9SBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */
11436d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
114439e00950SLois Curfman McInnes {
11457a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
11467a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
1147e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
11487a0afa10SBarry Smith   }
11497a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
115039e00950SLois Curfman McInnes   return 0;
115139e00950SLois Curfman McInnes }
115239e00950SLois Curfman McInnes 
11535615d1e5SSatish Balay #undef __FUNC__
11545615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ"
1155cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1156855ac2c5SLois Curfman McInnes {
1157855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1158ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1159416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1160416022c9SBarry Smith   double     sum = 0.0;
116104ca555eSLois Curfman McInnes   Scalar     *v;
116204ca555eSLois Curfman McInnes 
116317699dbbSLois Curfman McInnes   if (aij->size == 1) {
116414183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
116537fa93a5SLois Curfman McInnes   } else {
116604ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
116704ca555eSLois Curfman McInnes       v = amat->a;
116804ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
116904ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
117004ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
117104ca555eSLois Curfman McInnes #else
117204ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
117304ca555eSLois Curfman McInnes #endif
117404ca555eSLois Curfman McInnes       }
117504ca555eSLois Curfman McInnes       v = bmat->a;
117604ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
117704ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
117804ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
117904ca555eSLois Curfman McInnes #else
118004ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
118104ca555eSLois Curfman McInnes #endif
118204ca555eSLois Curfman McInnes       }
11836d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
118404ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
118504ca555eSLois Curfman McInnes     }
118604ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
118704ca555eSLois Curfman McInnes       double *tmp, *tmp2;
118804ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
11890452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
11900452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1191cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
119204ca555eSLois Curfman McInnes       *norm = 0.0;
119304ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
119404ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1195579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
119604ca555eSLois Curfman McInnes       }
119704ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
119804ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1199579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
120004ca555eSLois Curfman McInnes       }
12016d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
120204ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
120304ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
120404ca555eSLois Curfman McInnes       }
12050452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
120604ca555eSLois Curfman McInnes     }
120704ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1208515d9167SLois Curfman McInnes       double ntemp = 0.0;
120904ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1210dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
121104ca555eSLois Curfman McInnes         sum = 0.0;
121204ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1213cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
121404ca555eSLois Curfman McInnes         }
1215dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
121604ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1217cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
121804ca555eSLois Curfman McInnes         }
1219515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
122004ca555eSLois Curfman McInnes       }
12216d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
122204ca555eSLois Curfman McInnes     }
122304ca555eSLois Curfman McInnes     else {
1224e3372554SBarry Smith       SETERRQ(1,0,"No support for two norm");
122504ca555eSLois Curfman McInnes     }
122637fa93a5SLois Curfman McInnes   }
1227855ac2c5SLois Curfman McInnes   return 0;
1228855ac2c5SLois Curfman McInnes }
1229855ac2c5SLois Curfman McInnes 
12305615d1e5SSatish Balay #undef __FUNC__
12315615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ"
12320de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1233b7c46309SBarry Smith {
1234b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1235dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1236416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1237416022c9SBarry Smith   Mat        B;
1238b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1239b7c46309SBarry Smith   Scalar     *array;
1240b7c46309SBarry Smith 
12413638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
1242e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
1243b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1244b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1245b7c46309SBarry Smith 
1246b7c46309SBarry Smith   /* copy over the A part */
1247ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1248b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1249b7c46309SBarry Smith   row = a->rstart;
1250dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1251b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1252416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1253b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1254b7c46309SBarry Smith   }
1255b7c46309SBarry Smith   aj = Aloc->j;
12564af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1257b7c46309SBarry Smith 
1258b7c46309SBarry Smith   /* copy over the B part */
1259ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1260b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1261b7c46309SBarry Smith   row = a->rstart;
12620452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1263dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1264b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1265416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1266b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1267b7c46309SBarry Smith   }
12680452661fSBarry Smith   PetscFree(ct);
12696d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12706d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12713638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
12720de55854SLois Curfman McInnes     *matout = B;
12730de55854SLois Curfman McInnes   } else {
12740de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12750452661fSBarry Smith     PetscFree(a->rowners);
12760de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
12770de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
12780452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
12790452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
12800de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1281a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
12820452661fSBarry Smith     PetscFree(a);
1283416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
12840452661fSBarry Smith     PetscHeaderDestroy(B);
12850de55854SLois Curfman McInnes   }
1286b7c46309SBarry Smith   return 0;
1287b7c46309SBarry Smith }
1288b7c46309SBarry Smith 
12895615d1e5SSatish Balay #undef __FUNC__
12905615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ"
12914b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1292a008b906SSatish Balay {
12934b967eb1SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
12944b967eb1SSatish Balay   Mat a = aij->A, b = aij->B;
1295a008b906SSatish Balay   int ierr,s1,s2,s3;
1296a008b906SSatish Balay 
12974b967eb1SSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
12984b967eb1SSatish Balay   if (rr) {
12994b967eb1SSatish Balay     s3 = aij->n;
13004b967eb1SSatish Balay     VecGetLocalSize_Fast(rr,s1);
1301e3372554SBarry Smith     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
13024b967eb1SSatish Balay     /* Overlap communication with computation. */
130343a90d84SBarry Smith     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1304a008b906SSatish Balay   }
13054b967eb1SSatish Balay   if (ll) {
13064b967eb1SSatish Balay     VecGetLocalSize_Fast(ll,s1);
1307e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1308c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
13094b967eb1SSatish Balay   }
13104b967eb1SSatish Balay   /* scale  the diagonal block */
1311c351683dSSatish Balay   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
13124b967eb1SSatish Balay 
13134b967eb1SSatish Balay   if (rr) {
13144b967eb1SSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
131543a90d84SBarry Smith     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1316c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
13174b967eb1SSatish Balay   }
13184b967eb1SSatish Balay 
1319a008b906SSatish Balay   return 0;
1320a008b906SSatish Balay }
1321a008b906SSatish Balay 
1322a008b906SSatish Balay 
1323682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
13245615d1e5SSatish Balay #undef __FUNC__
13255eea60f9SBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */
1326682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1327682d7d0cSBarry Smith {
1328682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1329682d7d0cSBarry Smith 
1330682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1331682d7d0cSBarry Smith   else return 0;
1332682d7d0cSBarry Smith }
1333682d7d0cSBarry Smith 
13345615d1e5SSatish Balay #undef __FUNC__
13355eea60f9SBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */
13365a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
13375a838052SSatish Balay {
13385a838052SSatish Balay   *bs = 1;
13395a838052SSatish Balay   return 0;
13405a838052SSatish Balay }
13415615d1e5SSatish Balay #undef __FUNC__
13425eea60f9SBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */
1343bb5a7306SBarry Smith static int MatSetUnfactored_MPIAIJ(Mat A)
1344bb5a7306SBarry Smith {
1345bb5a7306SBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1346bb5a7306SBarry Smith   int        ierr;
1347bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1348bb5a7306SBarry Smith   return 0;
1349bb5a7306SBarry Smith }
1350bb5a7306SBarry Smith 
13513d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
13522f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
13530a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
13540a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
13558a729477SBarry Smith /* -------------------------------------------------------------------*/
13562ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
135739e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
135844a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
135944a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
136036ce4990SBarry Smith        0,0,
136136ce4990SBarry Smith        0,0,
136236ce4990SBarry Smith        0,0,
136344a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1364b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1365a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1366a008b906SSatish Balay        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1367ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13681eb62cbbSBarry Smith        0,
1369299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
137036ce4990SBarry Smith        0,0,0,0,
1371d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
137236ce4990SBarry Smith        0,0,
137394a9d846SBarry Smith        0,0,MatConvertSameType_MPIAIJ,0,0,
1374b49de8d1SLois Curfman McInnes        0,0,0,
1375598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1376052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
13773b2fbd54SBarry Smith        MatScale_MPIAIJ,0,0,0,
13780a198c4cSBarry Smith        MatGetBlockSize_MPIAIJ,0,0,0,0,
1379bb5a7306SBarry Smith        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
138036ce4990SBarry Smith 
13818a729477SBarry Smith 
13825615d1e5SSatish Balay #undef __FUNC__
13835615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ"
13841987afe7SBarry Smith /*@C
1385ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
13863a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
13873a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
13883a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
13893a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
13908a729477SBarry Smith 
13918a729477SBarry Smith    Input Parameters:
13921eb62cbbSBarry Smith .  comm - MPI communicator
13937d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
139492e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
139592e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
139692e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
139792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
139892e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
13997d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
140092e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1401ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1402ff756334SLois Curfman McInnes            (same for all local rows)
14032bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
140492e8d321SLois Curfman McInnes            diagonal portion of the local submatrix (possibly different for each row)
14052bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
14062bd5e0b2SLois Curfman McInnes            it is zero.
14072bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1408ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
14092bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
14102bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
14112bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
14128a729477SBarry Smith 
1413ff756334SLois Curfman McInnes    Output Parameter:
141444cd7ae7SLois Curfman McInnes .  A - the matrix
14158a729477SBarry Smith 
1416ff756334SLois Curfman McInnes    Notes:
1417ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1418ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
14190002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
14200002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1421ff756334SLois Curfman McInnes 
1422ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1423ff756334SLois Curfman McInnes    (possibly both).
1424ff756334SLois Curfman McInnes 
14255511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
14265511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
14275511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
14285511cfe3SLois Curfman McInnes 
14295511cfe3SLois Curfman McInnes    Options Database Keys:
14305511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
14316e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
14326e62573dSLois Curfman McInnes $        (max limit=5)
14336e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
14346e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
14356e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
14365511cfe3SLois Curfman McInnes 
1437e0245417SLois Curfman McInnes    Storage Information:
1438e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1439e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1440e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1441e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1442e0245417SLois Curfman McInnes 
1443e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
14445ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
14455ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
14465ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
14475ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1448ff756334SLois Curfman McInnes 
14495511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
14505511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
14512191d07cSBarry Smith 
1452b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1453b810aeb4SBarry Smith $         -------------------
14545511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
14555511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
14565511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
14575511cfe3SLois Curfman McInnes $         -------------------
1458b810aeb4SBarry Smith $
1459b810aeb4SBarry Smith 
14605511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
14615511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
14625511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
14635511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
14645511cfe3SLois Curfman McInnes 
14655511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
14665511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
14675511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
14683d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
146992e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
14706da5968aSLois Curfman McInnes    matrices.
14713a511b96SLois Curfman McInnes 
1472dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1473ff756334SLois Curfman McInnes 
1474fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
14758a729477SBarry Smith @*/
14761eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
147744cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
14788a729477SBarry Smith {
147944cd7ae7SLois Curfman McInnes   Mat          B;
148044cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
148136ce4990SBarry Smith   int          ierr, i,sum[2],work[2],size;
1482416022c9SBarry Smith 
148344cd7ae7SLois Curfman McInnes   *A = 0;
148444cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
148544cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
148644cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
148744cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
148844cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
148936ce4990SBarry Smith   MPI_Comm_size(comm,&size);
149036ce4990SBarry Smith   if (size == 1) {
149136ce4990SBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
149236ce4990SBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
149336ce4990SBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
149436ce4990SBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
149536ce4990SBarry Smith     B->ops.lufactor          = MatLUFactor_MPIAIJ;
149636ce4990SBarry Smith     B->ops.solve             = MatSolve_MPIAIJ;
149736ce4990SBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
149836ce4990SBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
149936ce4990SBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
150036ce4990SBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
150136ce4990SBarry Smith   }
150244cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
150344cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
150444cd7ae7SLois Curfman McInnes   B->factor     = 0;
150544cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
150690f02eecSBarry Smith   B->mapping    = 0;
1507d6dfbf8fSBarry Smith 
150847794344SBarry Smith   B->insertmode = NOT_SET_VALUES;
150944cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
151044cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
15111eb62cbbSBarry Smith 
1512b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1513e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
15141987afe7SBarry Smith 
1515dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
15161eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1517d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1518dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1519dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
15201eb62cbbSBarry Smith   }
152144cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
152244cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
152344cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
152444cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
152544cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
152644cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
15271eb62cbbSBarry Smith 
15281eb62cbbSBarry Smith   /* build local table of row and column ownerships */
152944cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
153044cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1531603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
153244cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
153344cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
153444cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
153544cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
15368a729477SBarry Smith   }
153744cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
153844cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
153944cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
154044cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
154144cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
154244cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
15438a729477SBarry Smith   }
154444cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
154544cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
15468a729477SBarry Smith 
15475ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
154844cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
154944cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
15507b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
155144cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
155244cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
15538a729477SBarry Smith 
15541eb62cbbSBarry Smith   /* build cache for off array entries formed */
155544cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
155690f02eecSBarry Smith   b->donotstash  = 0;
155744cd7ae7SLois Curfman McInnes   b->colmap      = 0;
155844cd7ae7SLois Curfman McInnes   b->garray      = 0;
155944cd7ae7SLois Curfman McInnes   b->roworiented = 1;
15608a729477SBarry Smith 
15611eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
156244cd7ae7SLois Curfman McInnes   b->lvec      = 0;
156344cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
15648a729477SBarry Smith 
15657a0afa10SBarry Smith   /* stuff for MatGetRow() */
156644cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
156744cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
156844cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
15697a0afa10SBarry Smith 
157044cd7ae7SLois Curfman McInnes   *A = B;
1571d6dfbf8fSBarry Smith   return 0;
1572d6dfbf8fSBarry Smith }
1573c74985f6SBarry Smith 
15745615d1e5SSatish Balay #undef __FUNC__
15755615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ"
15763d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1577d6dfbf8fSBarry Smith {
1578d6dfbf8fSBarry Smith   Mat        mat;
1579416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1580a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1581d6dfbf8fSBarry Smith 
1582416022c9SBarry Smith   *newmat       = 0;
15830452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1584a5a9c739SBarry Smith   PLogObjectCreate(mat);
15850452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1586416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
158744a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
158844a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1589d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1590c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1591d6dfbf8fSBarry Smith 
159244cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
159344cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
159444cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
159544cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1596d6dfbf8fSBarry Smith 
1597416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1598416022c9SBarry Smith   a->rend         = oldmat->rend;
1599416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1600416022c9SBarry Smith   a->cend         = oldmat->cend;
160117699dbbSLois Curfman McInnes   a->size         = oldmat->size;
160217699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
160347794344SBarry Smith   mat->insertmode = NOT_SET_VALUES;
1604bcd2baecSBarry Smith   a->rowvalues    = 0;
1605bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1606d6dfbf8fSBarry Smith 
1607603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1608603f58a4SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1609603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1610603f58a4SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1611416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
16122ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
16130452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1614416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1615416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1616416022c9SBarry Smith   } else a->colmap = 0;
16173f41c07dSBarry Smith   if (oldmat->garray) {
16183f41c07dSBarry Smith     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
16193f41c07dSBarry Smith     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1620464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
16213f41c07dSBarry Smith     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1622416022c9SBarry Smith   } else a->garray = 0;
1623d6dfbf8fSBarry Smith 
1624416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1625416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1626a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1627416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1628416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1629416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1630416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1631416022c9SBarry Smith   PLogObjectParent(mat,a->B);
16325dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
163325cdf11fSBarry Smith   if (flg) {
1634682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1635682d7d0cSBarry Smith   }
16368a729477SBarry Smith   *newmat = mat;
16378a729477SBarry Smith   return 0;
16388a729477SBarry Smith }
1639416022c9SBarry Smith 
164077c4ece6SBarry Smith #include "sys.h"
1641416022c9SBarry Smith 
16425615d1e5SSatish Balay #undef __FUNC__
16435615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ"
164419bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1645416022c9SBarry Smith {
1646d65a2f8fSBarry Smith   Mat          A;
1647416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1648d65a2f8fSBarry Smith   Scalar       *vals,*svals;
164919bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1650416022c9SBarry Smith   MPI_Status   status;
165117699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1652d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
165319bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1654416022c9SBarry Smith 
165517699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
165617699dbbSLois Curfman McInnes   if (!rank) {
165790ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
165877c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1659e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1660416022c9SBarry Smith   }
1661416022c9SBarry Smith 
1662416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1663416022c9SBarry Smith   M = header[1]; N = header[2];
1664416022c9SBarry Smith   /* determine ownership of all rows */
166517699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
16660452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1667416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1668416022c9SBarry Smith   rowners[0] = 0;
166917699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1670416022c9SBarry Smith     rowners[i] += rowners[i-1];
1671416022c9SBarry Smith   }
167217699dbbSLois Curfman McInnes   rstart = rowners[rank];
167317699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1674416022c9SBarry Smith 
1675416022c9SBarry Smith   /* distribute row lengths to all processors */
16760452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1677416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
167817699dbbSLois Curfman McInnes   if (!rank) {
16790452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
168077c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
16810452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
168217699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1683d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
16840452661fSBarry Smith     PetscFree(sndcounts);
1685416022c9SBarry Smith   }
1686416022c9SBarry Smith   else {
1687416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1688416022c9SBarry Smith   }
1689416022c9SBarry Smith 
169017699dbbSLois Curfman McInnes   if (!rank) {
1691416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
16920452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1693cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
169417699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1695416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1696416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1697416022c9SBarry Smith       }
1698416022c9SBarry Smith     }
16990452661fSBarry Smith     PetscFree(rowlengths);
1700416022c9SBarry Smith 
1701416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1702416022c9SBarry Smith     maxnz = 0;
170317699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
17040452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1705416022c9SBarry Smith     }
17060452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1707416022c9SBarry Smith 
1708416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1709416022c9SBarry Smith     nz = procsnz[0];
17100452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
171177c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1712d65a2f8fSBarry Smith 
1713d65a2f8fSBarry Smith     /* read in every one elses and ship off */
171417699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1715d65a2f8fSBarry Smith       nz = procsnz[i];
171677c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1717d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1718d65a2f8fSBarry Smith     }
17190452661fSBarry Smith     PetscFree(cols);
1720416022c9SBarry Smith   }
1721416022c9SBarry Smith   else {
1722416022c9SBarry Smith     /* determine buffer space needed for message */
1723416022c9SBarry Smith     nz = 0;
1724416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1725416022c9SBarry Smith       nz += ourlens[i];
1726416022c9SBarry Smith     }
17270452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1728416022c9SBarry Smith 
1729416022c9SBarry Smith     /* receive message of column indices*/
1730d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1731416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1732e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1733416022c9SBarry Smith   }
1734416022c9SBarry Smith 
1735416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1736cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1737416022c9SBarry Smith   jj = 0;
1738416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1739416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1740d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1741416022c9SBarry Smith       jj++;
1742416022c9SBarry Smith     }
1743416022c9SBarry Smith   }
1744d65a2f8fSBarry Smith 
1745d65a2f8fSBarry Smith   /* create our matrix */
1746416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1747416022c9SBarry Smith     ourlens[i] -= offlens[i];
1748416022c9SBarry Smith   }
1749d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1750d65a2f8fSBarry Smith   A = *newmat;
17516d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
1752d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1753d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1754d65a2f8fSBarry Smith   }
1755416022c9SBarry Smith 
175617699dbbSLois Curfman McInnes   if (!rank) {
17570452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1758416022c9SBarry Smith 
1759416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1760416022c9SBarry Smith     nz = procsnz[0];
176177c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1762d65a2f8fSBarry Smith 
1763d65a2f8fSBarry Smith     /* insert into matrix */
1764d65a2f8fSBarry Smith     jj      = rstart;
1765d65a2f8fSBarry Smith     smycols = mycols;
1766d65a2f8fSBarry Smith     svals   = vals;
1767d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1768d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1769d65a2f8fSBarry Smith       smycols += ourlens[i];
1770d65a2f8fSBarry Smith       svals   += ourlens[i];
1771d65a2f8fSBarry Smith       jj++;
1772416022c9SBarry Smith     }
1773416022c9SBarry Smith 
1774d65a2f8fSBarry Smith     /* read in other processors and ship out */
177517699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1776416022c9SBarry Smith       nz = procsnz[i];
177777c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1778d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1779416022c9SBarry Smith     }
17800452661fSBarry Smith     PetscFree(procsnz);
1781416022c9SBarry Smith   }
1782d65a2f8fSBarry Smith   else {
1783d65a2f8fSBarry Smith     /* receive numeric values */
17840452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1785416022c9SBarry Smith 
1786d65a2f8fSBarry Smith     /* receive message of values*/
1787d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1788d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1789e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1790d65a2f8fSBarry Smith 
1791d65a2f8fSBarry Smith     /* insert into matrix */
1792d65a2f8fSBarry Smith     jj      = rstart;
1793d65a2f8fSBarry Smith     smycols = mycols;
1794d65a2f8fSBarry Smith     svals   = vals;
1795d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1796d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1797d65a2f8fSBarry Smith       smycols += ourlens[i];
1798d65a2f8fSBarry Smith       svals   += ourlens[i];
1799d65a2f8fSBarry Smith       jj++;
1800d65a2f8fSBarry Smith     }
1801d65a2f8fSBarry Smith   }
18020452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1803d65a2f8fSBarry Smith 
18046d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
18056d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1806416022c9SBarry Smith   return 0;
1807416022c9SBarry Smith }
1808