xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0520107fe0fb6fb2de36e5dc2c15a82b6ccd3e44)
1cb512458SBarry Smith #ifndef lint
2*0520107fSSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.185 1997/01/06 20:24:32 balay Exp balay $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
7d9942c19SSatish Balay #include "src/inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
145615d1e5SSatish Balay #undef __FUNC__
155615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIAIJ_Private"
160a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat)
179e25ed09SBarry Smith {
1844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
19ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
20905e6a2fSBarry Smith   int        n = B->n,i;
21dbb450caSBarry Smith 
220452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
23464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
24cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
25905e6a2fSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
269e25ed09SBarry Smith   return 0;
279e25ed09SBarry Smith }
289e25ed09SBarry Smith 
292493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
302493cbb0SBarry Smith 
315615d1e5SSatish Balay #undef __FUNC__
325615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIAIJ"
333b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
343b2fbd54SBarry Smith                            PetscTruth *done)
35299609e3SLois Curfman McInnes {
36299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
37299609e3SLois Curfman McInnes   int        ierr;
3817699dbbSLois Curfman McInnes   if (aij->size == 1) {
393b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
40e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
413b2fbd54SBarry Smith   return 0;
423b2fbd54SBarry Smith }
433b2fbd54SBarry Smith 
445615d1e5SSatish Balay #undef __FUNC__
455615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIAIJ"
463b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
473b2fbd54SBarry Smith                                PetscTruth *done)
483b2fbd54SBarry Smith {
493b2fbd54SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
503b2fbd54SBarry Smith   int        ierr;
513b2fbd54SBarry Smith   if (aij->size == 1) {
523b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
53e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
54299609e3SLois Curfman McInnes   return 0;
55299609e3SLois Curfman McInnes }
56299609e3SLois Curfman McInnes 
57*0520107fSSatish Balay #define CHUNKSIZE   15
58*0520107fSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value) \
59*0520107fSSatish Balay { \
60*0520107fSSatish Balay  \
61*0520107fSSatish Balay     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
62*0520107fSSatish Balay     rmax = imax[row]; nrow = ailen[row];  \
63*0520107fSSatish Balay       for ( _i=0; _i<nrow; _i++ ) { \
64*0520107fSSatish Balay         if (rp[_i] > col) break; \
65*0520107fSSatish Balay         if (rp[_i] == col) { \
66*0520107fSSatish Balay           if (addv == ADD_VALUES) ap[_i] += value;   \
67*0520107fSSatish Balay           else                  ap[_i] = value; \
68*0520107fSSatish Balay           goto _noinsert; \
69*0520107fSSatish Balay         } \
70*0520107fSSatish Balay       }  \
71*0520107fSSatish Balay       if (a->nonew) goto _noinsert; \
72*0520107fSSatish Balay       if (nrow >= rmax) { \
73*0520107fSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
74*0520107fSSatish Balay         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
75*0520107fSSatish Balay         Scalar *new_a; \
76*0520107fSSatish Balay  \
77*0520107fSSatish Balay         /* malloc new storage space */ \
78*0520107fSSatish Balay         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
79*0520107fSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
80*0520107fSSatish Balay         new_j   = (int *) (new_a + new_nz); \
81*0520107fSSatish Balay         new_i   = new_j + new_nz; \
82*0520107fSSatish Balay  \
83*0520107fSSatish Balay         /* copy over old data into new slots */ \
84*0520107fSSatish Balay         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
85*0520107fSSatish Balay         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
86*0520107fSSatish Balay         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
87*0520107fSSatish Balay         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
88*0520107fSSatish Balay         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
89*0520107fSSatish Balay                                                            len*sizeof(int)); \
90*0520107fSSatish Balay         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
91*0520107fSSatish Balay         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
92*0520107fSSatish Balay                                                            len*sizeof(Scalar));  \
93*0520107fSSatish Balay         /* free up old matrix storage */ \
94*0520107fSSatish Balay         PetscFree(a->a);  \
95*0520107fSSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
96*0520107fSSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
97*0520107fSSatish Balay         a->singlemalloc = 1; \
98*0520107fSSatish Balay  \
99*0520107fSSatish Balay         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
100*0520107fSSatish Balay         rmax = imax[row] = imax[row] + CHUNKSIZE; \
101*0520107fSSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
102*0520107fSSatish Balay         a->maxnz += CHUNKSIZE; \
103*0520107fSSatish Balay         a->reallocs++; \
104*0520107fSSatish Balay       } \
105*0520107fSSatish Balay       N = nrow++ - 1; a->nz++; \
106*0520107fSSatish Balay       /* shift up all the later entries in this row */ \
107*0520107fSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
108*0520107fSSatish Balay         rp[ii+1] = rp[ii]; \
109*0520107fSSatish Balay         ap[ii+1] = ap[ii]; \
110*0520107fSSatish Balay       } \
111*0520107fSSatish Balay       rp[_i] = col;  \
112*0520107fSSatish Balay       ap[_i] = value;  \
113*0520107fSSatish Balay       _noinsert:; \
114*0520107fSSatish Balay     ailen[row] = nrow; \
115*0520107fSSatish Balay   return 0; \
116*0520107fSSatish Balay }
1170a198c4cSBarry Smith 
118*0520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
1195615d1e5SSatish Balay #undef __FUNC__
1205615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ"
1214b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
1228a729477SBarry Smith {
12344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1244b0e389bSBarry Smith   Scalar     value;
1251eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
1261eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
127905e6a2fSBarry Smith   int        roworiented = aij->roworiented;
1288a729477SBarry Smith 
129*0520107fSSatish Balay   /* Some Variables required in the macro */
130*0520107fSSatish Balay   Mat        A = aij->A;
131*0520107fSSatish Balay   Mat_SeqAIJ *a = (Mat_SeqAIJ *) (A)->data;
132*0520107fSSatish Balay   int        *rp,ii,nrow,_i,rmax,N;
133*0520107fSSatish Balay   int        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
134*0520107fSSatish Balay   int        *aj=a->j,shift=a->indexshift;
135*0520107fSSatish Balay   Scalar     *ap,*aa=a->a;
136*0520107fSSatish Balay 
1370a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
13864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
139e3372554SBarry Smith     SETERRQ(1,0,"Cannot mix inserts and adds");
1408a729477SBarry Smith   }
1410a198c4cSBarry Smith #endif
1421eb62cbbSBarry Smith   aij->insertmode = addv;
1438a729477SBarry Smith   for ( i=0; i<m; i++ ) {
1440a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
145e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
146e3372554SBarry Smith     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
1470a198c4cSBarry Smith #endif
1484b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
1494b0e389bSBarry Smith       row = im[i] - rstart;
1501eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
1514b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
1524b0e389bSBarry Smith           col = in[j] - cstart;
1534b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
154*0520107fSSatish Balay           MatSetValues_SeqAIJ_A_Private(row,col,value);
155*0520107fSSatish Balay           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
1561eb62cbbSBarry Smith         }
1570a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
158e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
159e3372554SBarry Smith         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
1600a198c4cSBarry Smith #endif
1611eb62cbbSBarry Smith         else {
162227d817aSBarry Smith           if (mat->was_assembled) {
163905e6a2fSBarry Smith             if (!aij->colmap) {
164905e6a2fSBarry Smith               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
165905e6a2fSBarry Smith             }
166905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
167ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
1682493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
1694b0e389bSBarry Smith               col =  in[j];
170d6dfbf8fSBarry Smith             }
1719e25ed09SBarry Smith           }
1724b0e389bSBarry Smith           else col = in[j];
1734b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
1740a198c4cSBarry Smith           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
1751eb62cbbSBarry Smith         }
1761eb62cbbSBarry Smith       }
1771eb62cbbSBarry Smith     }
1781eb62cbbSBarry Smith     else {
17990f02eecSBarry Smith       if (roworiented && !aij->donotstash) {
1804b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
1814b0e389bSBarry Smith       }
1824b0e389bSBarry Smith       else {
18390f02eecSBarry Smith         if (!aij->donotstash) {
1844b0e389bSBarry Smith           row = im[i];
1854b0e389bSBarry Smith           for ( j=0; j<n; j++ ) {
1864b0e389bSBarry Smith             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
1874b0e389bSBarry Smith           }
1884b0e389bSBarry Smith         }
1891eb62cbbSBarry Smith       }
1908a729477SBarry Smith     }
19190f02eecSBarry Smith   }
1928a729477SBarry Smith   return 0;
1938a729477SBarry Smith }
1948a729477SBarry Smith 
1955615d1e5SSatish Balay #undef __FUNC__
1965615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ"
197b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
198b49de8d1SLois Curfman McInnes {
199b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
200b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
201b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
202b49de8d1SLois Curfman McInnes 
203b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
204e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
205e3372554SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
206b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
207b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
208b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
209e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
210e3372554SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
211b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
212b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
213b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
214b49de8d1SLois Curfman McInnes         }
215b49de8d1SLois Curfman McInnes         else {
216905e6a2fSBarry Smith           if (!aij->colmap) {
217905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
218905e6a2fSBarry Smith           }
219905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
220e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
221d9d09a02SSatish Balay           else {
222b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
223b49de8d1SLois Curfman McInnes           }
224b49de8d1SLois Curfman McInnes         }
225b49de8d1SLois Curfman McInnes       }
226d9d09a02SSatish Balay     }
227b49de8d1SLois Curfman McInnes     else {
228e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
229b49de8d1SLois Curfman McInnes     }
230b49de8d1SLois Curfman McInnes   }
231b49de8d1SLois Curfman McInnes   return 0;
232b49de8d1SLois Curfman McInnes }
233b49de8d1SLois Curfman McInnes 
2345615d1e5SSatish Balay #undef __FUNC__
2355615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
236fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
2378a729477SBarry Smith {
23844a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
239d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
24017699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
24117699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
2421eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
2436abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
2441eb62cbbSBarry Smith   InsertMode  addv;
2451eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
2461eb62cbbSBarry Smith 
2471eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
248d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
249dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
250e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
2511eb62cbbSBarry Smith   }
2521eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
2531eb62cbbSBarry Smith 
2541eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
2550452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
256cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
2570452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
2581eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2591eb62cbbSBarry Smith     idx = aij->stash.idx[i];
26017699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2611eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
2621eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
2638a729477SBarry Smith       }
2648a729477SBarry Smith     }
2658a729477SBarry Smith   }
26617699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2671eb62cbbSBarry Smith 
2681eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
2690452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
27017699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
27117699dbbSLois Curfman McInnes   nreceives = work[rank];
27217699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
27317699dbbSLois Curfman McInnes   nmax = work[rank];
2740452661fSBarry Smith   PetscFree(work);
2751eb62cbbSBarry Smith 
2761eb62cbbSBarry Smith   /* post receives:
2771eb62cbbSBarry Smith        1) each message will consist of ordered pairs
2781eb62cbbSBarry Smith      (global index,value) we store the global index as a double
279d6dfbf8fSBarry Smith      to simplify the message passing.
2801eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
2811eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
2821eb62cbbSBarry Smith      this is a lot of wasted space.
2831eb62cbbSBarry Smith 
2841eb62cbbSBarry Smith 
2851eb62cbbSBarry Smith        This could be done better.
2861eb62cbbSBarry Smith   */
2870452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
28878b31e54SBarry Smith   CHKPTRQ(rvalues);
2890452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
29078b31e54SBarry Smith   CHKPTRQ(recv_waits);
2911eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
292416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
2931eb62cbbSBarry Smith               comm,recv_waits+i);
2941eb62cbbSBarry Smith   }
2951eb62cbbSBarry Smith 
2961eb62cbbSBarry Smith   /* do sends:
2971eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
2981eb62cbbSBarry Smith          the ith processor
2991eb62cbbSBarry Smith   */
3000452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
3010452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
30278b31e54SBarry Smith   CHKPTRQ(send_waits);
3030452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
3041eb62cbbSBarry Smith   starts[0] = 0;
30517699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3061eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
3071eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
3081eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
3091eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
3101eb62cbbSBarry Smith   }
3110452661fSBarry Smith   PetscFree(owner);
3121eb62cbbSBarry Smith   starts[0] = 0;
31317699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3141eb62cbbSBarry Smith   count = 0;
31517699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3161eb62cbbSBarry Smith     if (procs[i]) {
317416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
3181eb62cbbSBarry Smith                 comm,send_waits+count++);
3191eb62cbbSBarry Smith     }
3201eb62cbbSBarry Smith   }
3210452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
3221eb62cbbSBarry Smith 
3231eb62cbbSBarry Smith   /* Free cache space */
32455908cccSLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
32578b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
3261eb62cbbSBarry Smith 
3271eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
3281eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
3291eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
3301eb62cbbSBarry Smith   aij->rmax       = nmax;
3311eb62cbbSBarry Smith 
3321eb62cbbSBarry Smith   return 0;
3331eb62cbbSBarry Smith }
33444a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
3351eb62cbbSBarry Smith 
3365615d1e5SSatish Balay #undef __FUNC__
3375615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
338fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
3391eb62cbbSBarry Smith {
34044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
3411eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
342416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
343905e6a2fSBarry Smith   int         row,col,other_disassembled;
3441eb62cbbSBarry Smith   Scalar      *values,val;
3451eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
3461eb62cbbSBarry Smith 
3471eb62cbbSBarry Smith   /*  wait on receives */
3481eb62cbbSBarry Smith   while (count) {
349d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
3501eb62cbbSBarry Smith     /* unpack receives into our local space */
351d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
352c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
3531eb62cbbSBarry Smith     n = n/3;
3541eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
355227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
356227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
3571eb62cbbSBarry Smith       val = values[3*i+2];
3581eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
3591eb62cbbSBarry Smith         col -= aij->cstart;
3601eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
3611eb62cbbSBarry Smith       }
3621eb62cbbSBarry Smith       else {
36355a1b374SBarry Smith         if (mat->was_assembled || mat->assembled) {
364905e6a2fSBarry Smith           if (!aij->colmap) {
365905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366905e6a2fSBarry Smith           }
367905e6a2fSBarry Smith           col = aij->colmap[col] - 1;
368ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
3692493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
370227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
371d6dfbf8fSBarry Smith           }
3729e25ed09SBarry Smith         }
3731eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
3741eb62cbbSBarry Smith       }
3751eb62cbbSBarry Smith     }
3761eb62cbbSBarry Smith     count--;
3771eb62cbbSBarry Smith   }
3780452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
3791eb62cbbSBarry Smith 
3801eb62cbbSBarry Smith   /* wait on sends */
3811eb62cbbSBarry Smith   if (aij->nsends) {
3820a198c4cSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
3831eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
3840452661fSBarry Smith     PetscFree(send_status);
3851eb62cbbSBarry Smith   }
3860452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
3871eb62cbbSBarry Smith 
38864119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
38978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
39078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
3911eb62cbbSBarry Smith 
3922493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
3932493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
394227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
395227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
3962493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
3972493cbb0SBarry Smith   }
3982493cbb0SBarry Smith 
3996d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
40078b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
4015e42470aSBarry Smith   }
40278b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
40378b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
4045e42470aSBarry Smith 
4057a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
4068a729477SBarry Smith   return 0;
4078a729477SBarry Smith }
4088a729477SBarry Smith 
4095615d1e5SSatish Balay #undef __FUNC__
4105615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ"
4112ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
4121eb62cbbSBarry Smith {
41344a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
414dbd7a890SLois Curfman McInnes   int        ierr;
41578b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
41678b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
4171eb62cbbSBarry Smith   return 0;
4181eb62cbbSBarry Smith }
4191eb62cbbSBarry Smith 
4201eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
4211eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
4221eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
4231eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
4241eb62cbbSBarry Smith    routine.
4251eb62cbbSBarry Smith */
4265615d1e5SSatish Balay #undef __FUNC__
4275615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ"
42844a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
4291eb62cbbSBarry Smith {
43044a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
43117699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
4326abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
43317699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
4345392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
435d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
436d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
4371eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
4381eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
4391eb62cbbSBarry Smith   IS             istmp;
4401eb62cbbSBarry Smith 
44177c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
44278b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
4431eb62cbbSBarry Smith 
4441eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
4450452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
446cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
4470452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
4481eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
4491eb62cbbSBarry Smith     idx = rows[i];
4501eb62cbbSBarry Smith     found = 0;
45117699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
4521eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
4531eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
4541eb62cbbSBarry Smith       }
4551eb62cbbSBarry Smith     }
456e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
4571eb62cbbSBarry Smith   }
45817699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
4591eb62cbbSBarry Smith 
4601eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
4610452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
46217699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
46317699dbbSLois Curfman McInnes   nrecvs = work[rank];
46417699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
46517699dbbSLois Curfman McInnes   nmax = work[rank];
4660452661fSBarry Smith   PetscFree(work);
4671eb62cbbSBarry Smith 
4681eb62cbbSBarry Smith   /* post receives:   */
4690452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
47078b31e54SBarry Smith   CHKPTRQ(rvalues);
4710452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
47278b31e54SBarry Smith   CHKPTRQ(recv_waits);
4731eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
474416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
4751eb62cbbSBarry Smith   }
4761eb62cbbSBarry Smith 
4771eb62cbbSBarry Smith   /* do sends:
4781eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
4791eb62cbbSBarry Smith          the ith processor
4801eb62cbbSBarry Smith   */
4810452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
4820452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
48378b31e54SBarry Smith   CHKPTRQ(send_waits);
4840452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
4851eb62cbbSBarry Smith   starts[0] = 0;
48617699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4871eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
4881eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
4891eb62cbbSBarry Smith   }
4901eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
4911eb62cbbSBarry Smith 
4921eb62cbbSBarry Smith   starts[0] = 0;
49317699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4941eb62cbbSBarry Smith   count = 0;
49517699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
4961eb62cbbSBarry Smith     if (procs[i]) {
497416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
4981eb62cbbSBarry Smith     }
4991eb62cbbSBarry Smith   }
5000452661fSBarry Smith   PetscFree(starts);
5011eb62cbbSBarry Smith 
50217699dbbSLois Curfman McInnes   base = owners[rank];
5031eb62cbbSBarry Smith 
5041eb62cbbSBarry Smith   /*  wait on receives */
5050452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
5061eb62cbbSBarry Smith   source = lens + nrecvs;
5071eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
5081eb62cbbSBarry Smith   while (count) {
509d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
5101eb62cbbSBarry Smith     /* unpack receives into our local space */
5111eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
512d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
513d6dfbf8fSBarry Smith     lens[imdex]  = n;
5141eb62cbbSBarry Smith     slen += n;
5151eb62cbbSBarry Smith     count--;
5161eb62cbbSBarry Smith   }
5170452661fSBarry Smith   PetscFree(recv_waits);
5181eb62cbbSBarry Smith 
5191eb62cbbSBarry Smith   /* move the data into the send scatter */
5200452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
5211eb62cbbSBarry Smith   count = 0;
5221eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
5231eb62cbbSBarry Smith     values = rvalues + i*nmax;
5241eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
5251eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
5261eb62cbbSBarry Smith     }
5271eb62cbbSBarry Smith   }
5280452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
5290452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
5301eb62cbbSBarry Smith 
5311eb62cbbSBarry Smith   /* actually zap the local rows */
532537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
533464493b3SBarry Smith   PLogObjectParent(A,istmp);
5340452661fSBarry Smith   PetscFree(lrows);
53578b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
53678b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
53778b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
5381eb62cbbSBarry Smith 
5391eb62cbbSBarry Smith   /* wait on sends */
5401eb62cbbSBarry Smith   if (nsends) {
5410452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
54278b31e54SBarry Smith     CHKPTRQ(send_status);
5431eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
5440452661fSBarry Smith     PetscFree(send_status);
5451eb62cbbSBarry Smith   }
5460452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
5471eb62cbbSBarry Smith 
5481eb62cbbSBarry Smith   return 0;
5491eb62cbbSBarry Smith }
5501eb62cbbSBarry Smith 
5515615d1e5SSatish Balay #undef __FUNC__
5525615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ"
553416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
5541eb62cbbSBarry Smith {
555416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
556fbd6ef76SBarry Smith   int        ierr,nt;
557416022c9SBarry Smith 
558a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
559fbd6ef76SBarry Smith   if (nt != a->n) {
560e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and xx");
561fbd6ef76SBarry Smith   }
56243a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
56338597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
56443a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
56538597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
5661eb62cbbSBarry Smith   return 0;
5671eb62cbbSBarry Smith }
5681eb62cbbSBarry Smith 
5695615d1e5SSatish Balay #undef __FUNC__
5705615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ"
571416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
572da3a660dSBarry Smith {
573416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
574da3a660dSBarry Smith   int        ierr;
57543a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
57638597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
57743a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
57838597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
579da3a660dSBarry Smith   return 0;
580da3a660dSBarry Smith }
581da3a660dSBarry Smith 
5825615d1e5SSatish Balay #undef __FUNC__
5835615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ"
584416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
585da3a660dSBarry Smith {
586416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
587da3a660dSBarry Smith   int        ierr;
588da3a660dSBarry Smith 
589da3a660dSBarry Smith   /* do nondiagonal part */
59038597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
591da3a660dSBarry Smith   /* send it on its way */
592537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
593da3a660dSBarry Smith   /* do local part */
59438597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
595da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
596da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
597da3a660dSBarry Smith   /* but is not perhaps always true. */
598537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
599da3a660dSBarry Smith   return 0;
600da3a660dSBarry Smith }
601da3a660dSBarry Smith 
6025615d1e5SSatish Balay #undef __FUNC__
6035615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ"
604416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
605da3a660dSBarry Smith {
606416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
607da3a660dSBarry Smith   int        ierr;
608da3a660dSBarry Smith 
609da3a660dSBarry Smith   /* do nondiagonal part */
61038597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
611da3a660dSBarry Smith   /* send it on its way */
612537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
613da3a660dSBarry Smith   /* do local part */
61438597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
615da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
616da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
617da3a660dSBarry Smith   /* but is not perhaps always true. */
6180a198c4cSBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
619da3a660dSBarry Smith   return 0;
620da3a660dSBarry Smith }
621da3a660dSBarry Smith 
6221eb62cbbSBarry Smith /*
6231eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
6241eb62cbbSBarry Smith    diagonal block
6251eb62cbbSBarry Smith */
6265615d1e5SSatish Balay #undef __FUNC__
6275615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ"
628416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
6291eb62cbbSBarry Smith {
630416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
63144cd7ae7SLois Curfman McInnes   if (a->M != a->N)
632e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
6335baf8537SBarry Smith   if (a->rstart != a->cstart || a->rend != a->cend) {
634e3372554SBarry Smith     SETERRQ(1,0,"row partition must equal col partition");  }
635416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
6361eb62cbbSBarry Smith }
6371eb62cbbSBarry Smith 
6385615d1e5SSatish Balay #undef __FUNC__
6395615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ"
640052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
641052efed2SBarry Smith {
642052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
643052efed2SBarry Smith   int        ierr;
644052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
645052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
646052efed2SBarry Smith   return 0;
647052efed2SBarry Smith }
648052efed2SBarry Smith 
6495615d1e5SSatish Balay #undef __FUNC__
6505615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIAIJ"
65144a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
6521eb62cbbSBarry Smith {
6531eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
65444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
6551eb62cbbSBarry Smith   int        ierr;
656a5a9c739SBarry Smith #if defined(PETSC_LOG)
6576e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
658a5a9c739SBarry Smith #endif
6590452661fSBarry Smith   PetscFree(aij->rowners);
66078b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
66178b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
6620452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
6630452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
6641eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
665a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
6667a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
6670452661fSBarry Smith   PetscFree(aij);
66890f02eecSBarry Smith   if (mat->mapping) {
66990f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
67090f02eecSBarry Smith   }
671a5a9c739SBarry Smith   PLogObjectDestroy(mat);
6720452661fSBarry Smith   PetscHeaderDestroy(mat);
6731eb62cbbSBarry Smith   return 0;
6741eb62cbbSBarry Smith }
6757857610eSBarry Smith #include "draw.h"
676b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
677ee50ffe9SBarry Smith 
6785615d1e5SSatish Balay #undef __FUNC__
6795615d1e5SSatish Balay #define __FUNC__ "MatView_MPIAIJ_Binary"
680416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
6811eb62cbbSBarry Smith {
682416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
683416022c9SBarry Smith   int         ierr;
684416022c9SBarry Smith 
68517699dbbSLois Curfman McInnes   if (aij->size == 1) {
686416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
687416022c9SBarry Smith   }
688e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
689416022c9SBarry Smith   return 0;
690416022c9SBarry Smith }
691416022c9SBarry Smith 
6925615d1e5SSatish Balay #undef __FUNC__
6935615d1e5SSatish Balay #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
694d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
695416022c9SBarry Smith {
69644a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
697dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
698a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
699d636dbe3SBarry Smith   FILE        *fd;
70019bcc07fSBarry Smith   ViewerType  vtype;
701416022c9SBarry Smith 
70219bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
70319bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
70490ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
7050a198c4cSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
7064e220ebcSLois Curfman McInnes       MatInfo info;
7074e220ebcSLois Curfman McInnes       int     flg;
708a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
70990ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
7104e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
71195e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
71277c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
71395e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
7144e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
71595e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
7164e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
7174e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
7184e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
7194e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
7204e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
721a56f8943SBarry Smith       fflush(fd);
72277c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
723a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
724416022c9SBarry Smith       return 0;
725416022c9SBarry Smith     }
7260a198c4cSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
72708480c60SBarry Smith       return 0;
72808480c60SBarry Smith     }
729416022c9SBarry Smith   }
730416022c9SBarry Smith 
73119bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
73219bcc07fSBarry Smith     Draw       draw;
73319bcc07fSBarry Smith     PetscTruth isnull;
73419bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
73519bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
73619bcc07fSBarry Smith   }
73719bcc07fSBarry Smith 
73819bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
73990ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
74077c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
741d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
74217699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
7431eb62cbbSBarry Smith            aij->cend);
74478b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
74578b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
746d13ab20cSBarry Smith     fflush(fd);
74777c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
748d13ab20cSBarry Smith   }
749416022c9SBarry Smith   else {
750a56f8943SBarry Smith     int size = aij->size;
751a56f8943SBarry Smith     rank = aij->rank;
75217699dbbSLois Curfman McInnes     if (size == 1) {
75378b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
75495373324SBarry Smith     }
75595373324SBarry Smith     else {
75695373324SBarry Smith       /* assemble the entire matrix onto first processor. */
75795373324SBarry Smith       Mat         A;
758ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
7592eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
76095373324SBarry Smith       Scalar      *a;
7612ee70a88SLois Curfman McInnes 
76217699dbbSLois Curfman McInnes       if (!rank) {
763b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
764c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
76595373324SBarry Smith       }
76695373324SBarry Smith       else {
767b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
768c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
76995373324SBarry Smith       }
770464493b3SBarry Smith       PLogObjectParent(mat,A);
771416022c9SBarry Smith 
77295373324SBarry Smith       /* copy over the A part */
773ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
7742ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
77595373324SBarry Smith       row = aij->rstart;
776dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
77795373324SBarry Smith       for ( i=0; i<m; i++ ) {
778416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
77995373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
78095373324SBarry Smith       }
7812ee70a88SLois Curfman McInnes       aj = Aloc->j;
782dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
78395373324SBarry Smith 
78495373324SBarry Smith       /* copy over the B part */
785ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
7862ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
78795373324SBarry Smith       row = aij->rstart;
7880452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
789dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
79095373324SBarry Smith       for ( i=0; i<m; i++ ) {
791416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
79295373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
79395373324SBarry Smith       }
7940452661fSBarry Smith       PetscFree(ct);
7956d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7966d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
79717699dbbSLois Curfman McInnes       if (!rank) {
79878b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
79995373324SBarry Smith       }
80078b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
80195373324SBarry Smith     }
80295373324SBarry Smith   }
8031eb62cbbSBarry Smith   return 0;
8041eb62cbbSBarry Smith }
8051eb62cbbSBarry Smith 
8065615d1e5SSatish Balay #undef __FUNC__
8075615d1e5SSatish Balay #define __FUNC__ "MatView_MPIAIJ"
808416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
809416022c9SBarry Smith {
810416022c9SBarry Smith   Mat         mat = (Mat) obj;
811416022c9SBarry Smith   int         ierr;
81219bcc07fSBarry Smith   ViewerType  vtype;
813416022c9SBarry Smith 
81419bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
81519bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
81619bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
817d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
818416022c9SBarry Smith   }
81919bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
820416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
821416022c9SBarry Smith   }
822416022c9SBarry Smith   return 0;
823416022c9SBarry Smith }
824416022c9SBarry Smith 
8251eb62cbbSBarry Smith /*
8261eb62cbbSBarry Smith     This has to provide several versions.
8271eb62cbbSBarry Smith 
8281eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
8291eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
830d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
8311eb62cbbSBarry Smith */
8325615d1e5SSatish Balay #undef __FUNC__
8335615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ"
834fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
835dbb450caSBarry Smith                            double fshift,int its,Vec xx)
8368a729477SBarry Smith {
83744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
838d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
839ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
840c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
8416abc6512SBarry Smith   int        ierr,*idx, *diag;
842416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
8438a729477SBarry Smith 
844d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
845dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
846dbb450caSBarry Smith   ls = ls + shift;
847ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
848d6dfbf8fSBarry Smith   diag = A->diag;
849c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
850da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
85138597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
852da3a660dSBarry Smith     }
85343a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
85478b31e54SBarry Smith     CHKERRQ(ierr);
85543a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
85678b31e54SBarry Smith     CHKERRQ(ierr);
857d6dfbf8fSBarry Smith     while (its--) {
858d6dfbf8fSBarry Smith       /* go down through the rows */
859d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
860d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
861dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
862dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
863d6dfbf8fSBarry Smith         sum  = b[i];
864d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
865dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
866d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
867dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
868dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
869d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
87055a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
871d6dfbf8fSBarry Smith       }
872d6dfbf8fSBarry Smith       /* come up through the rows */
873d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
874d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
875dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
876dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
877d6dfbf8fSBarry Smith         sum  = b[i];
878d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
879dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
880d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
881dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
882dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
883d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
88455a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
885d6dfbf8fSBarry Smith       }
886d6dfbf8fSBarry Smith     }
887d6dfbf8fSBarry Smith   }
888d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
889da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
89038597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
891da3a660dSBarry Smith     }
89243a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
89378b31e54SBarry Smith     CHKERRQ(ierr);
89443a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
89578b31e54SBarry Smith     CHKERRQ(ierr);
896d6dfbf8fSBarry Smith     while (its--) {
897d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
898d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
899dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
900dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
901d6dfbf8fSBarry Smith         sum  = b[i];
902d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
903dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
904d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
905dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
906dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
907d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
90855a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
909d6dfbf8fSBarry Smith       }
910d6dfbf8fSBarry Smith     }
911d6dfbf8fSBarry Smith   }
912d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
913da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
91438597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
915da3a660dSBarry Smith     }
91643a90d84SBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
91778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
91843a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
91978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
920d6dfbf8fSBarry Smith     while (its--) {
921d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
922d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
923dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
924dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
925d6dfbf8fSBarry Smith         sum  = b[i];
926d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
927dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
928d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
929dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
930dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
931d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
93255a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
933d6dfbf8fSBarry Smith       }
934d6dfbf8fSBarry Smith     }
935d6dfbf8fSBarry Smith   }
936c16cb8f2SBarry Smith   else {
937e3372554SBarry Smith     SETERRQ(1,0,"Option not supported");
938c16cb8f2SBarry Smith   }
9398a729477SBarry Smith   return 0;
9408a729477SBarry Smith }
941a66be287SLois Curfman McInnes 
9425615d1e5SSatish Balay #undef __FUNC__
9435615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIAIJ"
9444e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
945a66be287SLois Curfman McInnes {
946a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
947a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
9484e220ebcSLois Curfman McInnes   int        ierr;
9494e220ebcSLois Curfman McInnes   double     isend[5], irecv[5];
950a66be287SLois Curfman McInnes 
9514e220ebcSLois Curfman McInnes   info->rows_global    = (double)mat->M;
9524e220ebcSLois Curfman McInnes   info->columns_global = (double)mat->N;
9534e220ebcSLois Curfman McInnes   info->rows_local     = (double)mat->m;
9544e220ebcSLois Curfman McInnes   info->columns_local  = (double)mat->N;
9554e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
9564e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
9574e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
9584e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
9594e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
9604e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
9614e220ebcSLois Curfman McInnes   isend[3] += info->memory;  isend[4] += info->mallocs;
962a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
9634e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
9644e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
9654e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
9664e220ebcSLois Curfman McInnes     info->memory       = isend[3];
9674e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
968a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
9694e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
9704e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9714e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9724e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9734e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9744e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
975a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
9764e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
9774e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9784e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9794e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9804e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9814e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
982a66be287SLois Curfman McInnes   }
9834e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
9844e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
9854e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
9864e220ebcSLois Curfman McInnes 
987a66be287SLois Curfman McInnes   return 0;
988a66be287SLois Curfman McInnes }
989a66be287SLois Curfman McInnes 
990299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
991299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
992299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
993299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
994299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
995299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
996299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
997299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
998299609e3SLois Curfman McInnes 
9995615d1e5SSatish Balay #undef __FUNC__
10005615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIAIJ"
1001416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1002c74985f6SBarry Smith {
1003c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1004c74985f6SBarry Smith 
10056d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
10066d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
10076da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1008b1fbbac0SLois Curfman McInnes       op == MAT_COLUMNS_SORTED) {
1009b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1010b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1011b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1012aeafbbfcSLois Curfman McInnes     a->roworiented = 1;
1013c0bbcb79SLois Curfman McInnes     MatSetOption(a->A,op);
1014c0bbcb79SLois Curfman McInnes     MatSetOption(a->B,op);
1015b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
10166da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
10176d4a8577SBarry Smith              op == MAT_SYMMETRIC ||
10186d4a8577SBarry Smith              op == MAT_STRUCTURALLY_SYMMETRIC ||
10196d4a8577SBarry Smith              op == MAT_YES_NEW_DIAGONALS)
102094a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10216d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
10224b0e389bSBarry Smith     a->roworiented = 0;
10234b0e389bSBarry Smith     MatSetOption(a->A,op);
10244b0e389bSBarry Smith     MatSetOption(a->B,op);
102590f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
102690f02eecSBarry Smith     a->donotstash = 1;
102790f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1028e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1029c0bbcb79SLois Curfman McInnes   else
1030e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1031c74985f6SBarry Smith   return 0;
1032c74985f6SBarry Smith }
1033c74985f6SBarry Smith 
10345615d1e5SSatish Balay #undef __FUNC__
10355615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIAIJ"
1036d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1037c74985f6SBarry Smith {
103844a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1039c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1040c74985f6SBarry Smith   return 0;
1041c74985f6SBarry Smith }
1042c74985f6SBarry Smith 
10435615d1e5SSatish Balay #undef __FUNC__
10445615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1045d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1046c74985f6SBarry Smith {
104744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1048b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1049c74985f6SBarry Smith   return 0;
1050c74985f6SBarry Smith }
1051c74985f6SBarry Smith 
10525615d1e5SSatish Balay #undef __FUNC__
10535615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1054d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1055c74985f6SBarry Smith {
105644a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1057c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1058c74985f6SBarry Smith   return 0;
1059c74985f6SBarry Smith }
1060c74985f6SBarry Smith 
10616d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
10626d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
10636d84be18SBarry Smith 
10645615d1e5SSatish Balay #undef __FUNC__
10655615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ"
10666d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
106739e00950SLois Curfman McInnes {
1068154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
106970f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1070154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1071154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
107270f0671dSBarry Smith   int        *cmap, *idx_p;
107339e00950SLois Curfman McInnes 
1074e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
10757a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
10767a0afa10SBarry Smith 
107770f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
10787a0afa10SBarry Smith     /*
10797a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
10807a0afa10SBarry Smith     */
10817a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1082c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
1083c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
10847a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
10857a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
10867a0afa10SBarry Smith     }
10877a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
10887a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
10897a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
10907a0afa10SBarry Smith   }
10917a0afa10SBarry Smith 
1092e3372554SBarry Smith   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1093abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
109439e00950SLois Curfman McInnes 
1095154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1096154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1097154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
109838597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
109938597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1100154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1101154123eaSLois Curfman McInnes 
110270f0671dSBarry Smith   cmap  = mat->garray;
1103154123eaSLois Curfman McInnes   if (v  || idx) {
1104154123eaSLois Curfman McInnes     if (nztot) {
1105154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
110670f0671dSBarry Smith       int imark = -1;
1107154123eaSLois Curfman McInnes       if (v) {
110870f0671dSBarry Smith         *v = v_p = mat->rowvalues;
110939e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
111070f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1111154123eaSLois Curfman McInnes           else break;
1112154123eaSLois Curfman McInnes         }
1113154123eaSLois Curfman McInnes         imark = i;
111470f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
111570f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1116154123eaSLois Curfman McInnes       }
1117154123eaSLois Curfman McInnes       if (idx) {
111870f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
111970f0671dSBarry Smith         if (imark > -1) {
112070f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
112170f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
112270f0671dSBarry Smith           }
112370f0671dSBarry Smith         } else {
1124154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
112570f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1126154123eaSLois Curfman McInnes             else break;
1127154123eaSLois Curfman McInnes           }
1128154123eaSLois Curfman McInnes           imark = i;
112970f0671dSBarry Smith         }
113070f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
113170f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
113239e00950SLois Curfman McInnes       }
113339e00950SLois Curfman McInnes     }
11341ca473b0SSatish Balay     else {
11351ca473b0SSatish Balay       if (idx) *idx = 0;
11361ca473b0SSatish Balay       if (v)   *v   = 0;
11371ca473b0SSatish Balay     }
1138154123eaSLois Curfman McInnes   }
113939e00950SLois Curfman McInnes   *nz = nztot;
114038597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
114138597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
114239e00950SLois Curfman McInnes   return 0;
114339e00950SLois Curfman McInnes }
114439e00950SLois Curfman McInnes 
11455615d1e5SSatish Balay #undef __FUNC__
11465615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIAIJ"
11476d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
114839e00950SLois Curfman McInnes {
11497a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
11507a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
1151e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
11527a0afa10SBarry Smith   }
11537a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
115439e00950SLois Curfman McInnes   return 0;
115539e00950SLois Curfman McInnes }
115639e00950SLois Curfman McInnes 
11575615d1e5SSatish Balay #undef __FUNC__
11585615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ"
1159cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1160855ac2c5SLois Curfman McInnes {
1161855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1162ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1163416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1164416022c9SBarry Smith   double     sum = 0.0;
116504ca555eSLois Curfman McInnes   Scalar     *v;
116604ca555eSLois Curfman McInnes 
116717699dbbSLois Curfman McInnes   if (aij->size == 1) {
116814183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
116937fa93a5SLois Curfman McInnes   } else {
117004ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
117104ca555eSLois Curfman McInnes       v = amat->a;
117204ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
117304ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
117404ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
117504ca555eSLois Curfman McInnes #else
117604ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
117704ca555eSLois Curfman McInnes #endif
117804ca555eSLois Curfman McInnes       }
117904ca555eSLois Curfman McInnes       v = bmat->a;
118004ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
118104ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
118204ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
118304ca555eSLois Curfman McInnes #else
118404ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
118504ca555eSLois Curfman McInnes #endif
118604ca555eSLois Curfman McInnes       }
11876d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
118804ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
118904ca555eSLois Curfman McInnes     }
119004ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
119104ca555eSLois Curfman McInnes       double *tmp, *tmp2;
119204ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
11930452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
11940452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1195cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
119604ca555eSLois Curfman McInnes       *norm = 0.0;
119704ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
119804ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1199579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
120004ca555eSLois Curfman McInnes       }
120104ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
120204ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1203579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
120404ca555eSLois Curfman McInnes       }
12056d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
120604ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
120704ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
120804ca555eSLois Curfman McInnes       }
12090452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
121004ca555eSLois Curfman McInnes     }
121104ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1212515d9167SLois Curfman McInnes       double ntemp = 0.0;
121304ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1214dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
121504ca555eSLois Curfman McInnes         sum = 0.0;
121604ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1217cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
121804ca555eSLois Curfman McInnes         }
1219dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
122004ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1221cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
122204ca555eSLois Curfman McInnes         }
1223515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
122404ca555eSLois Curfman McInnes       }
12256d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
122604ca555eSLois Curfman McInnes     }
122704ca555eSLois Curfman McInnes     else {
1228e3372554SBarry Smith       SETERRQ(1,0,"No support for two norm");
122904ca555eSLois Curfman McInnes     }
123037fa93a5SLois Curfman McInnes   }
1231855ac2c5SLois Curfman McInnes   return 0;
1232855ac2c5SLois Curfman McInnes }
1233855ac2c5SLois Curfman McInnes 
12345615d1e5SSatish Balay #undef __FUNC__
12355615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ"
12360de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1237b7c46309SBarry Smith {
1238b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1239dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1240416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1241416022c9SBarry Smith   Mat        B;
1242b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1243b7c46309SBarry Smith   Scalar     *array;
1244b7c46309SBarry Smith 
12453638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
1246e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
1247b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1248b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1249b7c46309SBarry Smith 
1250b7c46309SBarry Smith   /* copy over the A part */
1251ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1252b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1253b7c46309SBarry Smith   row = a->rstart;
1254dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1255b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1256416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1257b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1258b7c46309SBarry Smith   }
1259b7c46309SBarry Smith   aj = Aloc->j;
12604af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1261b7c46309SBarry Smith 
1262b7c46309SBarry Smith   /* copy over the B part */
1263ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1264b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1265b7c46309SBarry Smith   row = a->rstart;
12660452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1267dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1268b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1269416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1270b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1271b7c46309SBarry Smith   }
12720452661fSBarry Smith   PetscFree(ct);
12736d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12746d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12753638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
12760de55854SLois Curfman McInnes     *matout = B;
12770de55854SLois Curfman McInnes   } else {
12780de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12790452661fSBarry Smith     PetscFree(a->rowners);
12800de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
12810de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
12820452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
12830452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
12840de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1285a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
12860452661fSBarry Smith     PetscFree(a);
1287416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
12880452661fSBarry Smith     PetscHeaderDestroy(B);
12890de55854SLois Curfman McInnes   }
1290b7c46309SBarry Smith   return 0;
1291b7c46309SBarry Smith }
1292b7c46309SBarry Smith 
12935615d1e5SSatish Balay #undef __FUNC__
12945615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ"
12954b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1296a008b906SSatish Balay {
12974b967eb1SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
12984b967eb1SSatish Balay   Mat a = aij->A, b = aij->B;
1299a008b906SSatish Balay   int ierr,s1,s2,s3;
1300a008b906SSatish Balay 
13014b967eb1SSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
13024b967eb1SSatish Balay   if (rr) {
13034b967eb1SSatish Balay     s3 = aij->n;
13044b967eb1SSatish Balay     VecGetLocalSize_Fast(rr,s1);
1305e3372554SBarry Smith     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
13064b967eb1SSatish Balay     /* Overlap communication with computation. */
130743a90d84SBarry Smith     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1308a008b906SSatish Balay   }
13094b967eb1SSatish Balay   if (ll) {
13104b967eb1SSatish Balay     VecGetLocalSize_Fast(ll,s1);
1311e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1312c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
13134b967eb1SSatish Balay   }
13144b967eb1SSatish Balay   /* scale  the diagonal block */
1315c351683dSSatish Balay   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
13164b967eb1SSatish Balay 
13174b967eb1SSatish Balay   if (rr) {
13184b967eb1SSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
131943a90d84SBarry Smith     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1320c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
13214b967eb1SSatish Balay   }
13224b967eb1SSatish Balay 
1323a008b906SSatish Balay   return 0;
1324a008b906SSatish Balay }
1325a008b906SSatish Balay 
1326a008b906SSatish Balay 
1327682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
13285615d1e5SSatish Balay #undef __FUNC__
13295615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIAIJ"
1330682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1331682d7d0cSBarry Smith {
1332682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1333682d7d0cSBarry Smith 
1334682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1335682d7d0cSBarry Smith   else return 0;
1336682d7d0cSBarry Smith }
1337682d7d0cSBarry Smith 
13385615d1e5SSatish Balay #undef __FUNC__
13395615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIAIJ"
13405a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
13415a838052SSatish Balay {
13425a838052SSatish Balay   *bs = 1;
13435a838052SSatish Balay   return 0;
13445a838052SSatish Balay }
13455615d1e5SSatish Balay #undef __FUNC__
13465615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1347bb5a7306SBarry Smith static int MatSetUnfactored_MPIAIJ(Mat A)
1348bb5a7306SBarry Smith {
1349bb5a7306SBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1350bb5a7306SBarry Smith   int        ierr;
1351bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1352bb5a7306SBarry Smith   return 0;
1353bb5a7306SBarry Smith }
1354bb5a7306SBarry Smith 
13555a838052SSatish Balay 
1356fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
13573d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
13582f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
13590a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
13600a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
13618a729477SBarry Smith /* -------------------------------------------------------------------*/
13622ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
136339e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
136444a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
136544a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
136636ce4990SBarry Smith        0,0,
136736ce4990SBarry Smith        0,0,
136836ce4990SBarry Smith        0,0,
136944a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1370b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1371a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1372a008b906SSatish Balay        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1373ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13741eb62cbbSBarry Smith        0,
1375299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
137636ce4990SBarry Smith        0,0,0,0,
1377d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
137836ce4990SBarry Smith        0,0,
13793e85bedfSBarry Smith        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1380b49de8d1SLois Curfman McInnes        0,0,0,
1381598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1382052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
13833b2fbd54SBarry Smith        MatScale_MPIAIJ,0,0,0,
13840a198c4cSBarry Smith        MatGetBlockSize_MPIAIJ,0,0,0,0,
1385bb5a7306SBarry Smith        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
138636ce4990SBarry Smith 
13878a729477SBarry Smith 
13885615d1e5SSatish Balay #undef __FUNC__
13895615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ"
13901987afe7SBarry Smith /*@C
1391ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
13923a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
13933a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
13943a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
13953a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
13968a729477SBarry Smith 
13978a729477SBarry Smith    Input Parameters:
13981eb62cbbSBarry Smith .  comm - MPI communicator
13997d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
140092e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
140192e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
140292e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
140392e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
140492e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
14057d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
140692e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1407ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1408ff756334SLois Curfman McInnes            (same for all local rows)
14092bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
141092e8d321SLois Curfman McInnes            diagonal portion of the local submatrix (possibly different for each row)
14112bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
14122bd5e0b2SLois Curfman McInnes            it is zero.
14132bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1414ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
14152bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
14162bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
14172bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
14188a729477SBarry Smith 
1419ff756334SLois Curfman McInnes    Output Parameter:
142044cd7ae7SLois Curfman McInnes .  A - the matrix
14218a729477SBarry Smith 
1422ff756334SLois Curfman McInnes    Notes:
1423ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1424ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
14250002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
14260002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1427ff756334SLois Curfman McInnes 
1428ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1429ff756334SLois Curfman McInnes    (possibly both).
1430ff756334SLois Curfman McInnes 
14315511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
14325511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
14335511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
14345511cfe3SLois Curfman McInnes 
14355511cfe3SLois Curfman McInnes    Options Database Keys:
14365511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
14376e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
14386e62573dSLois Curfman McInnes $        (max limit=5)
14396e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
14406e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
14416e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
14425511cfe3SLois Curfman McInnes 
1443e0245417SLois Curfman McInnes    Storage Information:
1444e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1445e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1446e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1447e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1448e0245417SLois Curfman McInnes 
1449e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
14505ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
14515ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
14525ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
14535ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1454ff756334SLois Curfman McInnes 
14555511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
14565511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
14572191d07cSBarry Smith 
1458b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1459b810aeb4SBarry Smith $         -------------------
14605511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
14615511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
14625511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
14635511cfe3SLois Curfman McInnes $         -------------------
1464b810aeb4SBarry Smith $
1465b810aeb4SBarry Smith 
14665511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
14675511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
14685511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
14695511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
14705511cfe3SLois Curfman McInnes 
14715511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
14725511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
14735511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
14743d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
147592e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
14766da5968aSLois Curfman McInnes    matrices.
14773a511b96SLois Curfman McInnes 
1478dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1479ff756334SLois Curfman McInnes 
1480fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
14818a729477SBarry Smith @*/
14821eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
148344cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
14848a729477SBarry Smith {
148544cd7ae7SLois Curfman McInnes   Mat          B;
148644cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
148736ce4990SBarry Smith   int          ierr, i,sum[2],work[2],size;
1488416022c9SBarry Smith 
148944cd7ae7SLois Curfman McInnes   *A = 0;
149044cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
149144cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
149244cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
149344cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
149444cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
149536ce4990SBarry Smith   MPI_Comm_size(comm,&size);
149636ce4990SBarry Smith   if (size == 1) {
149736ce4990SBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
149836ce4990SBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
149936ce4990SBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
150036ce4990SBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
150136ce4990SBarry Smith     B->ops.lufactor          = MatLUFactor_MPIAIJ;
150236ce4990SBarry Smith     B->ops.solve             = MatSolve_MPIAIJ;
150336ce4990SBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
150436ce4990SBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
150536ce4990SBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
150636ce4990SBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
150736ce4990SBarry Smith   }
150844cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
150944cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
151044cd7ae7SLois Curfman McInnes   B->factor     = 0;
151144cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
151290f02eecSBarry Smith   B->mapping    = 0;
1513d6dfbf8fSBarry Smith 
151444cd7ae7SLois Curfman McInnes   b->insertmode = NOT_SET_VALUES;
151544cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
151644cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
15171eb62cbbSBarry Smith 
1518b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1519e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
15201987afe7SBarry Smith 
1521dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
15221eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1523d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1524dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1525dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
15261eb62cbbSBarry Smith   }
152744cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
152844cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
152944cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
153044cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
153144cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
153244cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
15331eb62cbbSBarry Smith 
15341eb62cbbSBarry Smith   /* build local table of row and column ownerships */
153544cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
153644cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1537603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
153844cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
153944cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
154044cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
154144cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
15428a729477SBarry Smith   }
154344cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
154444cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
154544cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
154644cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
154744cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
154844cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
15498a729477SBarry Smith   }
155044cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
155144cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
15528a729477SBarry Smith 
15535ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
155444cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
155544cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
15567b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
155744cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
155844cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
15598a729477SBarry Smith 
15601eb62cbbSBarry Smith   /* build cache for off array entries formed */
156144cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
156290f02eecSBarry Smith   b->donotstash  = 0;
156344cd7ae7SLois Curfman McInnes   b->colmap      = 0;
156444cd7ae7SLois Curfman McInnes   b->garray      = 0;
156544cd7ae7SLois Curfman McInnes   b->roworiented = 1;
15668a729477SBarry Smith 
15671eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
156844cd7ae7SLois Curfman McInnes   b->lvec      = 0;
156944cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
15708a729477SBarry Smith 
15717a0afa10SBarry Smith   /* stuff for MatGetRow() */
157244cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
157344cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
157444cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
15757a0afa10SBarry Smith 
157644cd7ae7SLois Curfman McInnes   *A = B;
1577d6dfbf8fSBarry Smith   return 0;
1578d6dfbf8fSBarry Smith }
1579c74985f6SBarry Smith 
15805615d1e5SSatish Balay #undef __FUNC__
15815615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ"
15823d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1583d6dfbf8fSBarry Smith {
1584d6dfbf8fSBarry Smith   Mat        mat;
1585416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1586a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1587d6dfbf8fSBarry Smith 
1588416022c9SBarry Smith   *newmat       = 0;
15890452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1590a5a9c739SBarry Smith   PLogObjectCreate(mat);
15910452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1592416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
159344a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
159444a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1595d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1596c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1597d6dfbf8fSBarry Smith 
159844cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
159944cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
160044cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
160144cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1602d6dfbf8fSBarry Smith 
1603416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1604416022c9SBarry Smith   a->rend         = oldmat->rend;
1605416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1606416022c9SBarry Smith   a->cend         = oldmat->cend;
160717699dbbSLois Curfman McInnes   a->size         = oldmat->size;
160817699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
160964119d58SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
1610bcd2baecSBarry Smith   a->rowvalues    = 0;
1611bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1612d6dfbf8fSBarry Smith 
1613603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1614603f58a4SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1615603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1616603f58a4SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1617416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
16182ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
16190452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1620416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1621416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1622416022c9SBarry Smith   } else a->colmap = 0;
16233f41c07dSBarry Smith   if (oldmat->garray) {
16243f41c07dSBarry Smith     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
16253f41c07dSBarry Smith     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1626464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
16273f41c07dSBarry Smith     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1628416022c9SBarry Smith   } else a->garray = 0;
1629d6dfbf8fSBarry Smith 
1630416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1631416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1632a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1633416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1634416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1635416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1636416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1637416022c9SBarry Smith   PLogObjectParent(mat,a->B);
16385dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
163925cdf11fSBarry Smith   if (flg) {
1640682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1641682d7d0cSBarry Smith   }
16428a729477SBarry Smith   *newmat = mat;
16438a729477SBarry Smith   return 0;
16448a729477SBarry Smith }
1645416022c9SBarry Smith 
164677c4ece6SBarry Smith #include "sys.h"
1647416022c9SBarry Smith 
16485615d1e5SSatish Balay #undef __FUNC__
16495615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ"
165019bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1651416022c9SBarry Smith {
1652d65a2f8fSBarry Smith   Mat          A;
1653416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1654d65a2f8fSBarry Smith   Scalar       *vals,*svals;
165519bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1656416022c9SBarry Smith   MPI_Status   status;
165717699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1658d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
165919bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1660416022c9SBarry Smith 
166117699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
166217699dbbSLois Curfman McInnes   if (!rank) {
166390ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
166477c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1665e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1666416022c9SBarry Smith   }
1667416022c9SBarry Smith 
1668416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1669416022c9SBarry Smith   M = header[1]; N = header[2];
1670416022c9SBarry Smith   /* determine ownership of all rows */
167117699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
16720452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1673416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1674416022c9SBarry Smith   rowners[0] = 0;
167517699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1676416022c9SBarry Smith     rowners[i] += rowners[i-1];
1677416022c9SBarry Smith   }
167817699dbbSLois Curfman McInnes   rstart = rowners[rank];
167917699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1680416022c9SBarry Smith 
1681416022c9SBarry Smith   /* distribute row lengths to all processors */
16820452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1683416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
168417699dbbSLois Curfman McInnes   if (!rank) {
16850452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
168677c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
16870452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
168817699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1689d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
16900452661fSBarry Smith     PetscFree(sndcounts);
1691416022c9SBarry Smith   }
1692416022c9SBarry Smith   else {
1693416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1694416022c9SBarry Smith   }
1695416022c9SBarry Smith 
169617699dbbSLois Curfman McInnes   if (!rank) {
1697416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
16980452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1699cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
170017699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1701416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1702416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1703416022c9SBarry Smith       }
1704416022c9SBarry Smith     }
17050452661fSBarry Smith     PetscFree(rowlengths);
1706416022c9SBarry Smith 
1707416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1708416022c9SBarry Smith     maxnz = 0;
170917699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
17100452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1711416022c9SBarry Smith     }
17120452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1713416022c9SBarry Smith 
1714416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1715416022c9SBarry Smith     nz = procsnz[0];
17160452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
171777c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1718d65a2f8fSBarry Smith 
1719d65a2f8fSBarry Smith     /* read in every one elses and ship off */
172017699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1721d65a2f8fSBarry Smith       nz = procsnz[i];
172277c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1723d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1724d65a2f8fSBarry Smith     }
17250452661fSBarry Smith     PetscFree(cols);
1726416022c9SBarry Smith   }
1727416022c9SBarry Smith   else {
1728416022c9SBarry Smith     /* determine buffer space needed for message */
1729416022c9SBarry Smith     nz = 0;
1730416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1731416022c9SBarry Smith       nz += ourlens[i];
1732416022c9SBarry Smith     }
17330452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1734416022c9SBarry Smith 
1735416022c9SBarry Smith     /* receive message of column indices*/
1736d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1737416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1738e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1739416022c9SBarry Smith   }
1740416022c9SBarry Smith 
1741416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1742cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1743416022c9SBarry Smith   jj = 0;
1744416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1745416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1746d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1747416022c9SBarry Smith       jj++;
1748416022c9SBarry Smith     }
1749416022c9SBarry Smith   }
1750d65a2f8fSBarry Smith 
1751d65a2f8fSBarry Smith   /* create our matrix */
1752416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1753416022c9SBarry Smith     ourlens[i] -= offlens[i];
1754416022c9SBarry Smith   }
1755d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1756d65a2f8fSBarry Smith   A = *newmat;
17576d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
1758d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1759d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1760d65a2f8fSBarry Smith   }
1761416022c9SBarry Smith 
176217699dbbSLois Curfman McInnes   if (!rank) {
17630452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1764416022c9SBarry Smith 
1765416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1766416022c9SBarry Smith     nz = procsnz[0];
176777c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1768d65a2f8fSBarry Smith 
1769d65a2f8fSBarry Smith     /* insert into matrix */
1770d65a2f8fSBarry Smith     jj      = rstart;
1771d65a2f8fSBarry Smith     smycols = mycols;
1772d65a2f8fSBarry Smith     svals   = vals;
1773d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1774d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1775d65a2f8fSBarry Smith       smycols += ourlens[i];
1776d65a2f8fSBarry Smith       svals   += ourlens[i];
1777d65a2f8fSBarry Smith       jj++;
1778416022c9SBarry Smith     }
1779416022c9SBarry Smith 
1780d65a2f8fSBarry Smith     /* read in other processors and ship out */
178117699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1782416022c9SBarry Smith       nz = procsnz[i];
178377c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1784d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1785416022c9SBarry Smith     }
17860452661fSBarry Smith     PetscFree(procsnz);
1787416022c9SBarry Smith   }
1788d65a2f8fSBarry Smith   else {
1789d65a2f8fSBarry Smith     /* receive numeric values */
17900452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1791416022c9SBarry Smith 
1792d65a2f8fSBarry Smith     /* receive message of values*/
1793d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1794d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1795e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1796d65a2f8fSBarry Smith 
1797d65a2f8fSBarry Smith     /* insert into matrix */
1798d65a2f8fSBarry Smith     jj      = rstart;
1799d65a2f8fSBarry Smith     smycols = mycols;
1800d65a2f8fSBarry Smith     svals   = vals;
1801d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1802d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1803d65a2f8fSBarry Smith       smycols += ourlens[i];
1804d65a2f8fSBarry Smith       svals   += ourlens[i];
1805d65a2f8fSBarry Smith       jj++;
1806d65a2f8fSBarry Smith     }
1807d65a2f8fSBarry Smith   }
18080452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1809d65a2f8fSBarry Smith 
18106d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
18116d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1812416022c9SBarry Smith   return 0;
1813416022c9SBarry Smith }
1814