xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f5e9677a6d02c22ff01eba66abf996f4c40596e1)
1cb512458SBarry Smith #ifndef lint
2*f5e9677aSSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.186 1997/01/14 17:18:18 balay Exp balay $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
7d9942c19SSatish Balay #include "src/inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
145615d1e5SSatish Balay #undef __FUNC__
155615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIAIJ_Private"
160a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat)
179e25ed09SBarry Smith {
1844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
19ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
20905e6a2fSBarry Smith   int        n = B->n,i;
21dbb450caSBarry Smith 
220452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
23464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
24cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
25905e6a2fSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
269e25ed09SBarry Smith   return 0;
279e25ed09SBarry Smith }
289e25ed09SBarry Smith 
292493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
302493cbb0SBarry Smith 
315615d1e5SSatish Balay #undef __FUNC__
325615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIAIJ"
333b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
343b2fbd54SBarry Smith                            PetscTruth *done)
35299609e3SLois Curfman McInnes {
36299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
37299609e3SLois Curfman McInnes   int        ierr;
3817699dbbSLois Curfman McInnes   if (aij->size == 1) {
393b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
40e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
413b2fbd54SBarry Smith   return 0;
423b2fbd54SBarry Smith }
433b2fbd54SBarry Smith 
445615d1e5SSatish Balay #undef __FUNC__
455615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIAIJ"
463b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
473b2fbd54SBarry Smith                                PetscTruth *done)
483b2fbd54SBarry Smith {
493b2fbd54SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
503b2fbd54SBarry Smith   int        ierr;
513b2fbd54SBarry Smith   if (aij->size == 1) {
523b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
53e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
54299609e3SLois Curfman McInnes   return 0;
55299609e3SLois Curfman McInnes }
56299609e3SLois Curfman McInnes 
570520107fSSatish Balay #define CHUNKSIZE   15
58*f5e9677aSSatish Balay #define MatSetValues_SeqAIJ_A_Private(A,row,col,value,addv) \
590520107fSSatish Balay { \
60*f5e9677aSSatish Balay   Mat_SeqAIJ *a = (Mat_SeqAIJ *) (A)->data; \
61*f5e9677aSSatish Balay   int        *rp,ii,nrow,_i,rmax, N, col1; \
62*f5e9677aSSatish Balay   int        *imax = a->imax, *ai = a->i, *ailen = a->ilen; \
63*f5e9677aSSatish Balay   int        *aj = a->j, nonew = a->nonew,shift = a->indexshift; \
64*f5e9677aSSatish Balay   Scalar     *ap, *aa = a->a; \
650520107fSSatish Balay  \
660520107fSSatish Balay     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
670520107fSSatish Balay     rmax = imax[row]; nrow = ailen[row];  \
68*f5e9677aSSatish Balay     col1 = col - shift; \
69*f5e9677aSSatish Balay      \
700520107fSSatish Balay       for ( _i=0; _i<nrow; _i++ ) { \
71*f5e9677aSSatish Balay         if (rp[_i] > col1) break; \
72*f5e9677aSSatish Balay         if (rp[_i] == col1) { \
730520107fSSatish Balay           if (addv == ADD_VALUES) ap[_i] += value;   \
740520107fSSatish Balay           else                  ap[_i] = value; \
750520107fSSatish Balay           goto _noinsert; \
760520107fSSatish Balay         } \
770520107fSSatish Balay       }  \
78*f5e9677aSSatish Balay       if (nonew)  goto _noinsert; \
790520107fSSatish Balay       if (nrow >= rmax) { \
800520107fSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
810520107fSSatish Balay         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
820520107fSSatish Balay         Scalar *new_a; \
830520107fSSatish Balay  \
840520107fSSatish Balay         /* malloc new storage space */ \
850520107fSSatish Balay         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
860520107fSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
870520107fSSatish Balay         new_j   = (int *) (new_a + new_nz); \
880520107fSSatish Balay         new_i   = new_j + new_nz; \
890520107fSSatish Balay  \
900520107fSSatish Balay         /* copy over old data into new slots */ \
910520107fSSatish Balay         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
920520107fSSatish Balay         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
930520107fSSatish Balay         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
940520107fSSatish Balay         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
950520107fSSatish Balay         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
960520107fSSatish Balay                                                            len*sizeof(int)); \
970520107fSSatish Balay         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
980520107fSSatish Balay         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
990520107fSSatish Balay                                                            len*sizeof(Scalar));  \
1000520107fSSatish Balay         /* free up old matrix storage */ \
101*f5e9677aSSatish Balay  \
1020520107fSSatish Balay         PetscFree(a->a);  \
1030520107fSSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
1040520107fSSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1050520107fSSatish Balay         a->singlemalloc = 1; \
1060520107fSSatish Balay  \
1070520107fSSatish Balay         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
1080520107fSSatish Balay         rmax = imax[row] = imax[row] + CHUNKSIZE; \
1090520107fSSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
1100520107fSSatish Balay         a->maxnz += CHUNKSIZE; \
1110520107fSSatish Balay         a->reallocs++; \
1120520107fSSatish Balay       } \
1130520107fSSatish Balay       N = nrow++ - 1; a->nz++; \
1140520107fSSatish Balay       /* shift up all the later entries in this row */ \
1150520107fSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
1160520107fSSatish Balay         rp[ii+1] = rp[ii]; \
1170520107fSSatish Balay         ap[ii+1] = ap[ii]; \
1180520107fSSatish Balay       } \
119*f5e9677aSSatish Balay       rp[_i] = col1;  \
1200520107fSSatish Balay       ap[_i] = value;  \
1210520107fSSatish Balay       _noinsert: ; \
1220520107fSSatish Balay       ailen[row] = nrow; \
1230520107fSSatish Balay }
1240a198c4cSBarry Smith 
1250520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
1265615d1e5SSatish Balay #undef __FUNC__
1275615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ"
1284b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
1298a729477SBarry Smith {
13044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1314b0e389bSBarry Smith   Scalar     value;
1321eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
1331eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
134905e6a2fSBarry Smith   int        roworiented = aij->roworiented;
1358a729477SBarry Smith 
1360520107fSSatish Balay   /* Some Variables required in the macro */
137*f5e9677aSSatish Balay   /*  Mat        A = aij->A;
1380520107fSSatish Balay   Mat_SeqAIJ *a = (Mat_SeqAIJ *) (A)->data;
139*f5e9677aSSatish Balay   int        *rp,ii,nrow,_i,rmax,N,col2;
1400520107fSSatish Balay   int        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1410520107fSSatish Balay   int        *aj=a->j,shift=a->indexshift;
1420520107fSSatish Balay   Scalar     *ap,*aa=a->a;
143*f5e9677aSSatish Balay   */
1440a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
14564119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
146e3372554SBarry Smith     SETERRQ(1,0,"Cannot mix inserts and adds");
1478a729477SBarry Smith   }
1480a198c4cSBarry Smith #endif
1491eb62cbbSBarry Smith   aij->insertmode = addv;
1508a729477SBarry Smith   for ( i=0; i<m; i++ ) {
1510a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
152e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
153e3372554SBarry Smith     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
1540a198c4cSBarry Smith #endif
1554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
1564b0e389bSBarry Smith       row = im[i] - rstart;
1571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
1584b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
1594b0e389bSBarry Smith           col = in[j] - cstart;
1604b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
161*f5e9677aSSatish Balay           MatSetValues_SeqAIJ_A_Private(aij->A,row,col,value,addv);
1620520107fSSatish Balay           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
1631eb62cbbSBarry Smith         }
1640a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
165e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
166e3372554SBarry Smith         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
1670a198c4cSBarry Smith #endif
1681eb62cbbSBarry Smith         else {
169227d817aSBarry Smith           if (mat->was_assembled) {
170905e6a2fSBarry Smith             if (!aij->colmap) {
171905e6a2fSBarry Smith               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
172905e6a2fSBarry Smith             }
173905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
174ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
1752493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
1764b0e389bSBarry Smith               col =  in[j];
177d6dfbf8fSBarry Smith             }
1789e25ed09SBarry Smith           }
1794b0e389bSBarry Smith           else col = in[j];
1804b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
1810a198c4cSBarry Smith           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
1821eb62cbbSBarry Smith         }
1831eb62cbbSBarry Smith       }
1841eb62cbbSBarry Smith     }
1851eb62cbbSBarry Smith     else {
18690f02eecSBarry Smith       if (roworiented && !aij->donotstash) {
1874b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
1884b0e389bSBarry Smith       }
1894b0e389bSBarry Smith       else {
19090f02eecSBarry Smith         if (!aij->donotstash) {
1914b0e389bSBarry Smith           row = im[i];
1924b0e389bSBarry Smith           for ( j=0; j<n; j++ ) {
1934b0e389bSBarry Smith             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
1944b0e389bSBarry Smith           }
1954b0e389bSBarry Smith         }
1961eb62cbbSBarry Smith       }
1978a729477SBarry Smith     }
19890f02eecSBarry Smith   }
1998a729477SBarry Smith   return 0;
2008a729477SBarry Smith }
2018a729477SBarry Smith 
2025615d1e5SSatish Balay #undef __FUNC__
2035615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ"
204b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
205b49de8d1SLois Curfman McInnes {
206b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
207b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
208b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
209b49de8d1SLois Curfman McInnes 
210b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
211e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
212e3372554SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
213b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
214b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
215b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
216e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
217e3372554SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
218b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
219b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
220b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
221b49de8d1SLois Curfman McInnes         }
222b49de8d1SLois Curfman McInnes         else {
223905e6a2fSBarry Smith           if (!aij->colmap) {
224905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
225905e6a2fSBarry Smith           }
226905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
227e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
228d9d09a02SSatish Balay           else {
229b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
230b49de8d1SLois Curfman McInnes           }
231b49de8d1SLois Curfman McInnes         }
232b49de8d1SLois Curfman McInnes       }
233d9d09a02SSatish Balay     }
234b49de8d1SLois Curfman McInnes     else {
235e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
236b49de8d1SLois Curfman McInnes     }
237b49de8d1SLois Curfman McInnes   }
238b49de8d1SLois Curfman McInnes   return 0;
239b49de8d1SLois Curfman McInnes }
240b49de8d1SLois Curfman McInnes 
2415615d1e5SSatish Balay #undef __FUNC__
2425615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
243fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
2448a729477SBarry Smith {
24544a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
246d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
24717699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
24817699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
2491eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
2506abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
2511eb62cbbSBarry Smith   InsertMode  addv;
2521eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
2531eb62cbbSBarry Smith 
2541eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
255d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
256dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
257e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
2581eb62cbbSBarry Smith   }
2591eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
2601eb62cbbSBarry Smith 
2611eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
2620452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
263cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
2640452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
2651eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2661eb62cbbSBarry Smith     idx = aij->stash.idx[i];
26717699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2681eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
2691eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
2708a729477SBarry Smith       }
2718a729477SBarry Smith     }
2728a729477SBarry Smith   }
27317699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2741eb62cbbSBarry Smith 
2751eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
2760452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
27717699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
27817699dbbSLois Curfman McInnes   nreceives = work[rank];
27917699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
28017699dbbSLois Curfman McInnes   nmax = work[rank];
2810452661fSBarry Smith   PetscFree(work);
2821eb62cbbSBarry Smith 
2831eb62cbbSBarry Smith   /* post receives:
2841eb62cbbSBarry Smith        1) each message will consist of ordered pairs
2851eb62cbbSBarry Smith      (global index,value) we store the global index as a double
286d6dfbf8fSBarry Smith      to simplify the message passing.
2871eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
2881eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
2891eb62cbbSBarry Smith      this is a lot of wasted space.
2901eb62cbbSBarry Smith 
2911eb62cbbSBarry Smith 
2921eb62cbbSBarry Smith        This could be done better.
2931eb62cbbSBarry Smith   */
2940452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
29578b31e54SBarry Smith   CHKPTRQ(rvalues);
2960452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
29778b31e54SBarry Smith   CHKPTRQ(recv_waits);
2981eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
299416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
3001eb62cbbSBarry Smith               comm,recv_waits+i);
3011eb62cbbSBarry Smith   }
3021eb62cbbSBarry Smith 
3031eb62cbbSBarry Smith   /* do sends:
3041eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3051eb62cbbSBarry Smith          the ith processor
3061eb62cbbSBarry Smith   */
3070452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
3080452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
30978b31e54SBarry Smith   CHKPTRQ(send_waits);
3100452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
3111eb62cbbSBarry Smith   starts[0] = 0;
31217699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3131eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
3141eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
3151eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
3161eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
3171eb62cbbSBarry Smith   }
3180452661fSBarry Smith   PetscFree(owner);
3191eb62cbbSBarry Smith   starts[0] = 0;
32017699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3211eb62cbbSBarry Smith   count = 0;
32217699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3231eb62cbbSBarry Smith     if (procs[i]) {
324416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
3251eb62cbbSBarry Smith                 comm,send_waits+count++);
3261eb62cbbSBarry Smith     }
3271eb62cbbSBarry Smith   }
3280452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
3291eb62cbbSBarry Smith 
3301eb62cbbSBarry Smith   /* Free cache space */
33155908cccSLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
33278b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
3331eb62cbbSBarry Smith 
3341eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
3351eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
3361eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
3371eb62cbbSBarry Smith   aij->rmax       = nmax;
3381eb62cbbSBarry Smith 
3391eb62cbbSBarry Smith   return 0;
3401eb62cbbSBarry Smith }
34144a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
3421eb62cbbSBarry Smith 
3435615d1e5SSatish Balay #undef __FUNC__
3445615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
345fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
3461eb62cbbSBarry Smith {
34744a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
3481eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
349416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
350905e6a2fSBarry Smith   int         row,col,other_disassembled;
3511eb62cbbSBarry Smith   Scalar      *values,val;
3521eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
3531eb62cbbSBarry Smith 
3541eb62cbbSBarry Smith   /*  wait on receives */
3551eb62cbbSBarry Smith   while (count) {
356d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
3571eb62cbbSBarry Smith     /* unpack receives into our local space */
358d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
359c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
3601eb62cbbSBarry Smith     n = n/3;
3611eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
362227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
363227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
3641eb62cbbSBarry Smith       val = values[3*i+2];
3651eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
3661eb62cbbSBarry Smith         col -= aij->cstart;
3671eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
3681eb62cbbSBarry Smith       }
3691eb62cbbSBarry Smith       else {
37055a1b374SBarry Smith         if (mat->was_assembled || mat->assembled) {
371905e6a2fSBarry Smith           if (!aij->colmap) {
372905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
373905e6a2fSBarry Smith           }
374905e6a2fSBarry Smith           col = aij->colmap[col] - 1;
375ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
3762493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
377227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
378d6dfbf8fSBarry Smith           }
3799e25ed09SBarry Smith         }
3801eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
3811eb62cbbSBarry Smith       }
3821eb62cbbSBarry Smith     }
3831eb62cbbSBarry Smith     count--;
3841eb62cbbSBarry Smith   }
3850452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
3861eb62cbbSBarry Smith 
3871eb62cbbSBarry Smith   /* wait on sends */
3881eb62cbbSBarry Smith   if (aij->nsends) {
3890a198c4cSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
3901eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
3910452661fSBarry Smith     PetscFree(send_status);
3921eb62cbbSBarry Smith   }
3930452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
3941eb62cbbSBarry Smith 
39564119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
39678b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
39778b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
3981eb62cbbSBarry Smith 
3992493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
4002493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
401227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
402227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
4032493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
4042493cbb0SBarry Smith   }
4052493cbb0SBarry Smith 
4066d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
40778b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
4085e42470aSBarry Smith   }
40978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
41078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
4115e42470aSBarry Smith 
4127a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
4138a729477SBarry Smith   return 0;
4148a729477SBarry Smith }
4158a729477SBarry Smith 
4165615d1e5SSatish Balay #undef __FUNC__
4175615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ"
4182ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
4191eb62cbbSBarry Smith {
42044a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
421dbd7a890SLois Curfman McInnes   int        ierr;
42278b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
42378b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
4241eb62cbbSBarry Smith   return 0;
4251eb62cbbSBarry Smith }
4261eb62cbbSBarry Smith 
4271eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
4281eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
4291eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
4301eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
4311eb62cbbSBarry Smith    routine.
4321eb62cbbSBarry Smith */
4335615d1e5SSatish Balay #undef __FUNC__
4345615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ"
43544a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
4361eb62cbbSBarry Smith {
43744a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
43817699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
4396abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
44017699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
4415392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
442d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
443d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
4441eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
4451eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
4461eb62cbbSBarry Smith   IS             istmp;
4471eb62cbbSBarry Smith 
44877c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
44978b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
4501eb62cbbSBarry Smith 
4511eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
4520452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
453cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
4540452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
4551eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
4561eb62cbbSBarry Smith     idx = rows[i];
4571eb62cbbSBarry Smith     found = 0;
45817699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
4591eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
4601eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
4611eb62cbbSBarry Smith       }
4621eb62cbbSBarry Smith     }
463e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
4641eb62cbbSBarry Smith   }
46517699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
4661eb62cbbSBarry Smith 
4671eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
4680452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
46917699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
47017699dbbSLois Curfman McInnes   nrecvs = work[rank];
47117699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
47217699dbbSLois Curfman McInnes   nmax = work[rank];
4730452661fSBarry Smith   PetscFree(work);
4741eb62cbbSBarry Smith 
4751eb62cbbSBarry Smith   /* post receives:   */
4760452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
47778b31e54SBarry Smith   CHKPTRQ(rvalues);
4780452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
47978b31e54SBarry Smith   CHKPTRQ(recv_waits);
4801eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
481416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
4821eb62cbbSBarry Smith   }
4831eb62cbbSBarry Smith 
4841eb62cbbSBarry Smith   /* do sends:
4851eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
4861eb62cbbSBarry Smith          the ith processor
4871eb62cbbSBarry Smith   */
4880452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
4890452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
49078b31e54SBarry Smith   CHKPTRQ(send_waits);
4910452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
4921eb62cbbSBarry Smith   starts[0] = 0;
49317699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4941eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
4951eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
4961eb62cbbSBarry Smith   }
4971eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
4981eb62cbbSBarry Smith 
4991eb62cbbSBarry Smith   starts[0] = 0;
50017699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
5011eb62cbbSBarry Smith   count = 0;
50217699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
5031eb62cbbSBarry Smith     if (procs[i]) {
504416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
5051eb62cbbSBarry Smith     }
5061eb62cbbSBarry Smith   }
5070452661fSBarry Smith   PetscFree(starts);
5081eb62cbbSBarry Smith 
50917699dbbSLois Curfman McInnes   base = owners[rank];
5101eb62cbbSBarry Smith 
5111eb62cbbSBarry Smith   /*  wait on receives */
5120452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
5131eb62cbbSBarry Smith   source = lens + nrecvs;
5141eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
5151eb62cbbSBarry Smith   while (count) {
516d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
5171eb62cbbSBarry Smith     /* unpack receives into our local space */
5181eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
519d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
520d6dfbf8fSBarry Smith     lens[imdex]  = n;
5211eb62cbbSBarry Smith     slen += n;
5221eb62cbbSBarry Smith     count--;
5231eb62cbbSBarry Smith   }
5240452661fSBarry Smith   PetscFree(recv_waits);
5251eb62cbbSBarry Smith 
5261eb62cbbSBarry Smith   /* move the data into the send scatter */
5270452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
5281eb62cbbSBarry Smith   count = 0;
5291eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
5301eb62cbbSBarry Smith     values = rvalues + i*nmax;
5311eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
5321eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
5331eb62cbbSBarry Smith     }
5341eb62cbbSBarry Smith   }
5350452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
5360452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
5371eb62cbbSBarry Smith 
5381eb62cbbSBarry Smith   /* actually zap the local rows */
539537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
540464493b3SBarry Smith   PLogObjectParent(A,istmp);
5410452661fSBarry Smith   PetscFree(lrows);
54278b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
54378b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
54478b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
5451eb62cbbSBarry Smith 
5461eb62cbbSBarry Smith   /* wait on sends */
5471eb62cbbSBarry Smith   if (nsends) {
5480452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
54978b31e54SBarry Smith     CHKPTRQ(send_status);
5501eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
5510452661fSBarry Smith     PetscFree(send_status);
5521eb62cbbSBarry Smith   }
5530452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
5541eb62cbbSBarry Smith 
5551eb62cbbSBarry Smith   return 0;
5561eb62cbbSBarry Smith }
5571eb62cbbSBarry Smith 
5585615d1e5SSatish Balay #undef __FUNC__
5595615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ"
560416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
5611eb62cbbSBarry Smith {
562416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
563fbd6ef76SBarry Smith   int        ierr,nt;
564416022c9SBarry Smith 
565a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
566fbd6ef76SBarry Smith   if (nt != a->n) {
567e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and xx");
568fbd6ef76SBarry Smith   }
56943a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
57038597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
57143a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
57238597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
5731eb62cbbSBarry Smith   return 0;
5741eb62cbbSBarry Smith }
5751eb62cbbSBarry Smith 
5765615d1e5SSatish Balay #undef __FUNC__
5775615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ"
578416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
579da3a660dSBarry Smith {
580416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
581da3a660dSBarry Smith   int        ierr;
58243a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
58338597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
58443a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
58538597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
586da3a660dSBarry Smith   return 0;
587da3a660dSBarry Smith }
588da3a660dSBarry Smith 
5895615d1e5SSatish Balay #undef __FUNC__
5905615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ"
591416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
592da3a660dSBarry Smith {
593416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
594da3a660dSBarry Smith   int        ierr;
595da3a660dSBarry Smith 
596da3a660dSBarry Smith   /* do nondiagonal part */
59738597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
598da3a660dSBarry Smith   /* send it on its way */
599537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
600da3a660dSBarry Smith   /* do local part */
60138597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
602da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
603da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
604da3a660dSBarry Smith   /* but is not perhaps always true. */
605537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
606da3a660dSBarry Smith   return 0;
607da3a660dSBarry Smith }
608da3a660dSBarry Smith 
6095615d1e5SSatish Balay #undef __FUNC__
6105615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ"
611416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
612da3a660dSBarry Smith {
613416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
614da3a660dSBarry Smith   int        ierr;
615da3a660dSBarry Smith 
616da3a660dSBarry Smith   /* do nondiagonal part */
61738597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
618da3a660dSBarry Smith   /* send it on its way */
619537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
620da3a660dSBarry Smith   /* do local part */
62138597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
622da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
623da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
624da3a660dSBarry Smith   /* but is not perhaps always true. */
6250a198c4cSBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
626da3a660dSBarry Smith   return 0;
627da3a660dSBarry Smith }
628da3a660dSBarry Smith 
6291eb62cbbSBarry Smith /*
6301eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
6311eb62cbbSBarry Smith    diagonal block
6321eb62cbbSBarry Smith */
6335615d1e5SSatish Balay #undef __FUNC__
6345615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ"
635416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
6361eb62cbbSBarry Smith {
637416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
63844cd7ae7SLois Curfman McInnes   if (a->M != a->N)
639e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
6405baf8537SBarry Smith   if (a->rstart != a->cstart || a->rend != a->cend) {
641e3372554SBarry Smith     SETERRQ(1,0,"row partition must equal col partition");  }
642416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
6431eb62cbbSBarry Smith }
6441eb62cbbSBarry Smith 
6455615d1e5SSatish Balay #undef __FUNC__
6465615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ"
647052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
648052efed2SBarry Smith {
649052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
650052efed2SBarry Smith   int        ierr;
651052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
652052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
653052efed2SBarry Smith   return 0;
654052efed2SBarry Smith }
655052efed2SBarry Smith 
6565615d1e5SSatish Balay #undef __FUNC__
6575615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIAIJ"
65844a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
6591eb62cbbSBarry Smith {
6601eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
66144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
6621eb62cbbSBarry Smith   int        ierr;
663a5a9c739SBarry Smith #if defined(PETSC_LOG)
6646e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
665a5a9c739SBarry Smith #endif
6660452661fSBarry Smith   PetscFree(aij->rowners);
66778b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
66878b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
6690452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
6700452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
6711eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
672a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
6737a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
6740452661fSBarry Smith   PetscFree(aij);
67590f02eecSBarry Smith   if (mat->mapping) {
67690f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
67790f02eecSBarry Smith   }
678a5a9c739SBarry Smith   PLogObjectDestroy(mat);
6790452661fSBarry Smith   PetscHeaderDestroy(mat);
6801eb62cbbSBarry Smith   return 0;
6811eb62cbbSBarry Smith }
6827857610eSBarry Smith #include "draw.h"
683b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
684ee50ffe9SBarry Smith 
6855615d1e5SSatish Balay #undef __FUNC__
6865615d1e5SSatish Balay #define __FUNC__ "MatView_MPIAIJ_Binary"
687416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
6881eb62cbbSBarry Smith {
689416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
690416022c9SBarry Smith   int         ierr;
691416022c9SBarry Smith 
69217699dbbSLois Curfman McInnes   if (aij->size == 1) {
693416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
694416022c9SBarry Smith   }
695e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
696416022c9SBarry Smith   return 0;
697416022c9SBarry Smith }
698416022c9SBarry Smith 
6995615d1e5SSatish Balay #undef __FUNC__
7005615d1e5SSatish Balay #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
701d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
702416022c9SBarry Smith {
70344a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
704dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
705a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
706d636dbe3SBarry Smith   FILE        *fd;
70719bcc07fSBarry Smith   ViewerType  vtype;
708416022c9SBarry Smith 
70919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
71019bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
71190ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
7120a198c4cSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
7134e220ebcSLois Curfman McInnes       MatInfo info;
7144e220ebcSLois Curfman McInnes       int     flg;
715a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
71690ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
7174e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
71895e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
71977c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
72095e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
7214e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
72295e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
7234e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
7244e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
7254e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
7264e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
7274e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
728a56f8943SBarry Smith       fflush(fd);
72977c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
730a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
731416022c9SBarry Smith       return 0;
732416022c9SBarry Smith     }
7330a198c4cSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
73408480c60SBarry Smith       return 0;
73508480c60SBarry Smith     }
736416022c9SBarry Smith   }
737416022c9SBarry Smith 
73819bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
73919bcc07fSBarry Smith     Draw       draw;
74019bcc07fSBarry Smith     PetscTruth isnull;
74119bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
74219bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
74319bcc07fSBarry Smith   }
74419bcc07fSBarry Smith 
74519bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
74690ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
74777c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
748d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
74917699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
7501eb62cbbSBarry Smith            aij->cend);
75178b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
75278b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
753d13ab20cSBarry Smith     fflush(fd);
75477c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
755d13ab20cSBarry Smith   }
756416022c9SBarry Smith   else {
757a56f8943SBarry Smith     int size = aij->size;
758a56f8943SBarry Smith     rank = aij->rank;
75917699dbbSLois Curfman McInnes     if (size == 1) {
76078b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
76195373324SBarry Smith     }
76295373324SBarry Smith     else {
76395373324SBarry Smith       /* assemble the entire matrix onto first processor. */
76495373324SBarry Smith       Mat         A;
765ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
7662eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
76795373324SBarry Smith       Scalar      *a;
7682ee70a88SLois Curfman McInnes 
76917699dbbSLois Curfman McInnes       if (!rank) {
770b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
771c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
77295373324SBarry Smith       }
77395373324SBarry Smith       else {
774b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
775c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
77695373324SBarry Smith       }
777464493b3SBarry Smith       PLogObjectParent(mat,A);
778416022c9SBarry Smith 
77995373324SBarry Smith       /* copy over the A part */
780ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
7812ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
78295373324SBarry Smith       row = aij->rstart;
783dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
78495373324SBarry Smith       for ( i=0; i<m; i++ ) {
785416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
78695373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
78795373324SBarry Smith       }
7882ee70a88SLois Curfman McInnes       aj = Aloc->j;
789dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
79095373324SBarry Smith 
79195373324SBarry Smith       /* copy over the B part */
792ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
7932ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
79495373324SBarry Smith       row = aij->rstart;
7950452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
796dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
79795373324SBarry Smith       for ( i=0; i<m; i++ ) {
798416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
79995373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
80095373324SBarry Smith       }
8010452661fSBarry Smith       PetscFree(ct);
8026d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8036d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
80417699dbbSLois Curfman McInnes       if (!rank) {
80578b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
80695373324SBarry Smith       }
80778b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
80895373324SBarry Smith     }
80995373324SBarry Smith   }
8101eb62cbbSBarry Smith   return 0;
8111eb62cbbSBarry Smith }
8121eb62cbbSBarry Smith 
8135615d1e5SSatish Balay #undef __FUNC__
8145615d1e5SSatish Balay #define __FUNC__ "MatView_MPIAIJ"
815416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
816416022c9SBarry Smith {
817416022c9SBarry Smith   Mat         mat = (Mat) obj;
818416022c9SBarry Smith   int         ierr;
81919bcc07fSBarry Smith   ViewerType  vtype;
820416022c9SBarry Smith 
82119bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
82219bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
82319bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
824d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
825416022c9SBarry Smith   }
82619bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
827416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
828416022c9SBarry Smith   }
829416022c9SBarry Smith   return 0;
830416022c9SBarry Smith }
831416022c9SBarry Smith 
8321eb62cbbSBarry Smith /*
8331eb62cbbSBarry Smith     This has to provide several versions.
8341eb62cbbSBarry Smith 
8351eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
8361eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
837d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
8381eb62cbbSBarry Smith */
8395615d1e5SSatish Balay #undef __FUNC__
8405615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ"
841fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
842dbb450caSBarry Smith                            double fshift,int its,Vec xx)
8438a729477SBarry Smith {
84444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
845d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
846ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
847c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
8486abc6512SBarry Smith   int        ierr,*idx, *diag;
849416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
8508a729477SBarry Smith 
851d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
852dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
853dbb450caSBarry Smith   ls = ls + shift;
854ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
855d6dfbf8fSBarry Smith   diag = A->diag;
856c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
857da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
85838597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
859da3a660dSBarry Smith     }
86043a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
86178b31e54SBarry Smith     CHKERRQ(ierr);
86243a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
86378b31e54SBarry Smith     CHKERRQ(ierr);
864d6dfbf8fSBarry Smith     while (its--) {
865d6dfbf8fSBarry Smith       /* go down through the rows */
866d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
867d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
868dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
869dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
870d6dfbf8fSBarry Smith         sum  = b[i];
871d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
872dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
873d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
874dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
875dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
876d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
87755a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
878d6dfbf8fSBarry Smith       }
879d6dfbf8fSBarry Smith       /* come up through the rows */
880d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
881d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
882dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
883dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
884d6dfbf8fSBarry Smith         sum  = b[i];
885d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
886dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
887d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
888dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
889dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
890d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
89155a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
892d6dfbf8fSBarry Smith       }
893d6dfbf8fSBarry Smith     }
894d6dfbf8fSBarry Smith   }
895d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
896da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
89738597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
898da3a660dSBarry Smith     }
89943a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
90078b31e54SBarry Smith     CHKERRQ(ierr);
90143a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
90278b31e54SBarry Smith     CHKERRQ(ierr);
903d6dfbf8fSBarry Smith     while (its--) {
904d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
905d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
906dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
907dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
908d6dfbf8fSBarry Smith         sum  = b[i];
909d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
910dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
911d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
912dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
913dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
914d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
91555a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
916d6dfbf8fSBarry Smith       }
917d6dfbf8fSBarry Smith     }
918d6dfbf8fSBarry Smith   }
919d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
920da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
92138597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
922da3a660dSBarry Smith     }
92343a90d84SBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
92478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
92543a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
92678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
927d6dfbf8fSBarry Smith     while (its--) {
928d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
929d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
930dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
931dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
932d6dfbf8fSBarry Smith         sum  = b[i];
933d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
934dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
935d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
936dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
937dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
938d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
93955a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
940d6dfbf8fSBarry Smith       }
941d6dfbf8fSBarry Smith     }
942d6dfbf8fSBarry Smith   }
943c16cb8f2SBarry Smith   else {
944e3372554SBarry Smith     SETERRQ(1,0,"Option not supported");
945c16cb8f2SBarry Smith   }
9468a729477SBarry Smith   return 0;
9478a729477SBarry Smith }
948a66be287SLois Curfman McInnes 
9495615d1e5SSatish Balay #undef __FUNC__
9505615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIAIJ"
9514e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
952a66be287SLois Curfman McInnes {
953a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
954a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
9554e220ebcSLois Curfman McInnes   int        ierr;
9564e220ebcSLois Curfman McInnes   double     isend[5], irecv[5];
957a66be287SLois Curfman McInnes 
9584e220ebcSLois Curfman McInnes   info->rows_global    = (double)mat->M;
9594e220ebcSLois Curfman McInnes   info->columns_global = (double)mat->N;
9604e220ebcSLois Curfman McInnes   info->rows_local     = (double)mat->m;
9614e220ebcSLois Curfman McInnes   info->columns_local  = (double)mat->N;
9624e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
9634e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
9644e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
9654e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
9664e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
9674e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
9684e220ebcSLois Curfman McInnes   isend[3] += info->memory;  isend[4] += info->mallocs;
969a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
9704e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
9714e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
9724e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
9734e220ebcSLois Curfman McInnes     info->memory       = isend[3];
9744e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
975a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
9764e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
9774e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9784e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9794e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9804e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9814e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
982a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
9834e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
9844e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9854e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9864e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9874e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9884e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
989a66be287SLois Curfman McInnes   }
9904e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
9914e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
9924e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
9934e220ebcSLois Curfman McInnes 
994a66be287SLois Curfman McInnes   return 0;
995a66be287SLois Curfman McInnes }
996a66be287SLois Curfman McInnes 
997299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
998299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
999299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1000299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1001299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1002299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1003299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1004299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1005299609e3SLois Curfman McInnes 
10065615d1e5SSatish Balay #undef __FUNC__
10075615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIAIJ"
1008416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1009c74985f6SBarry Smith {
1010c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1011c74985f6SBarry Smith 
10126d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
10136d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
10146da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1015b1fbbac0SLois Curfman McInnes       op == MAT_COLUMNS_SORTED) {
1016b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1017b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1018b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1019aeafbbfcSLois Curfman McInnes     a->roworiented = 1;
1020c0bbcb79SLois Curfman McInnes     MatSetOption(a->A,op);
1021c0bbcb79SLois Curfman McInnes     MatSetOption(a->B,op);
1022b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
10236da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
10246d4a8577SBarry Smith              op == MAT_SYMMETRIC ||
10256d4a8577SBarry Smith              op == MAT_STRUCTURALLY_SYMMETRIC ||
10266d4a8577SBarry Smith              op == MAT_YES_NEW_DIAGONALS)
102794a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10286d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
10294b0e389bSBarry Smith     a->roworiented = 0;
10304b0e389bSBarry Smith     MatSetOption(a->A,op);
10314b0e389bSBarry Smith     MatSetOption(a->B,op);
103290f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
103390f02eecSBarry Smith     a->donotstash = 1;
103490f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1035e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1036c0bbcb79SLois Curfman McInnes   else
1037e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1038c74985f6SBarry Smith   return 0;
1039c74985f6SBarry Smith }
1040c74985f6SBarry Smith 
10415615d1e5SSatish Balay #undef __FUNC__
10425615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIAIJ"
1043d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1044c74985f6SBarry Smith {
104544a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1046c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1047c74985f6SBarry Smith   return 0;
1048c74985f6SBarry Smith }
1049c74985f6SBarry Smith 
10505615d1e5SSatish Balay #undef __FUNC__
10515615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1052d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1053c74985f6SBarry Smith {
105444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1055b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1056c74985f6SBarry Smith   return 0;
1057c74985f6SBarry Smith }
1058c74985f6SBarry Smith 
10595615d1e5SSatish Balay #undef __FUNC__
10605615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1061d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1062c74985f6SBarry Smith {
106344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1064c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1065c74985f6SBarry Smith   return 0;
1066c74985f6SBarry Smith }
1067c74985f6SBarry Smith 
10686d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
10696d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
10706d84be18SBarry Smith 
10715615d1e5SSatish Balay #undef __FUNC__
10725615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ"
10736d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
107439e00950SLois Curfman McInnes {
1075154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
107670f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1077154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1078154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
107970f0671dSBarry Smith   int        *cmap, *idx_p;
108039e00950SLois Curfman McInnes 
1081e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
10827a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
10837a0afa10SBarry Smith 
108470f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
10857a0afa10SBarry Smith     /*
10867a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
10877a0afa10SBarry Smith     */
10887a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1089c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
1090c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
10917a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
10927a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
10937a0afa10SBarry Smith     }
10947a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
10957a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
10967a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
10977a0afa10SBarry Smith   }
10987a0afa10SBarry Smith 
1099e3372554SBarry Smith   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1100abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
110139e00950SLois Curfman McInnes 
1102154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1103154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1104154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
110538597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
110638597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1107154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1108154123eaSLois Curfman McInnes 
110970f0671dSBarry Smith   cmap  = mat->garray;
1110154123eaSLois Curfman McInnes   if (v  || idx) {
1111154123eaSLois Curfman McInnes     if (nztot) {
1112154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
111370f0671dSBarry Smith       int imark = -1;
1114154123eaSLois Curfman McInnes       if (v) {
111570f0671dSBarry Smith         *v = v_p = mat->rowvalues;
111639e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
111770f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1118154123eaSLois Curfman McInnes           else break;
1119154123eaSLois Curfman McInnes         }
1120154123eaSLois Curfman McInnes         imark = i;
112170f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
112270f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1123154123eaSLois Curfman McInnes       }
1124154123eaSLois Curfman McInnes       if (idx) {
112570f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
112670f0671dSBarry Smith         if (imark > -1) {
112770f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
112870f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
112970f0671dSBarry Smith           }
113070f0671dSBarry Smith         } else {
1131154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
113270f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1133154123eaSLois Curfman McInnes             else break;
1134154123eaSLois Curfman McInnes           }
1135154123eaSLois Curfman McInnes           imark = i;
113670f0671dSBarry Smith         }
113770f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
113870f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
113939e00950SLois Curfman McInnes       }
114039e00950SLois Curfman McInnes     }
11411ca473b0SSatish Balay     else {
11421ca473b0SSatish Balay       if (idx) *idx = 0;
11431ca473b0SSatish Balay       if (v)   *v   = 0;
11441ca473b0SSatish Balay     }
1145154123eaSLois Curfman McInnes   }
114639e00950SLois Curfman McInnes   *nz = nztot;
114738597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
114838597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
114939e00950SLois Curfman McInnes   return 0;
115039e00950SLois Curfman McInnes }
115139e00950SLois Curfman McInnes 
11525615d1e5SSatish Balay #undef __FUNC__
11535615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIAIJ"
11546d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
115539e00950SLois Curfman McInnes {
11567a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
11577a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
1158e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
11597a0afa10SBarry Smith   }
11607a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
116139e00950SLois Curfman McInnes   return 0;
116239e00950SLois Curfman McInnes }
116339e00950SLois Curfman McInnes 
11645615d1e5SSatish Balay #undef __FUNC__
11655615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ"
1166cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1167855ac2c5SLois Curfman McInnes {
1168855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1169ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1170416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1171416022c9SBarry Smith   double     sum = 0.0;
117204ca555eSLois Curfman McInnes   Scalar     *v;
117304ca555eSLois Curfman McInnes 
117417699dbbSLois Curfman McInnes   if (aij->size == 1) {
117514183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
117637fa93a5SLois Curfman McInnes   } else {
117704ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
117804ca555eSLois Curfman McInnes       v = amat->a;
117904ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
118004ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
118104ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
118204ca555eSLois Curfman McInnes #else
118304ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
118404ca555eSLois Curfman McInnes #endif
118504ca555eSLois Curfman McInnes       }
118604ca555eSLois Curfman McInnes       v = bmat->a;
118704ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
118804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
118904ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
119004ca555eSLois Curfman McInnes #else
119104ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
119204ca555eSLois Curfman McInnes #endif
119304ca555eSLois Curfman McInnes       }
11946d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
119504ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
119604ca555eSLois Curfman McInnes     }
119704ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
119804ca555eSLois Curfman McInnes       double *tmp, *tmp2;
119904ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
12000452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
12010452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1202cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
120304ca555eSLois Curfman McInnes       *norm = 0.0;
120404ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
120504ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1206579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
120704ca555eSLois Curfman McInnes       }
120804ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
120904ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1210579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
121104ca555eSLois Curfman McInnes       }
12126d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
121304ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
121404ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
121504ca555eSLois Curfman McInnes       }
12160452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
121704ca555eSLois Curfman McInnes     }
121804ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1219515d9167SLois Curfman McInnes       double ntemp = 0.0;
122004ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1221dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
122204ca555eSLois Curfman McInnes         sum = 0.0;
122304ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1224cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
122504ca555eSLois Curfman McInnes         }
1226dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
122704ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1228cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
122904ca555eSLois Curfman McInnes         }
1230515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
123104ca555eSLois Curfman McInnes       }
12326d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
123304ca555eSLois Curfman McInnes     }
123404ca555eSLois Curfman McInnes     else {
1235e3372554SBarry Smith       SETERRQ(1,0,"No support for two norm");
123604ca555eSLois Curfman McInnes     }
123737fa93a5SLois Curfman McInnes   }
1238855ac2c5SLois Curfman McInnes   return 0;
1239855ac2c5SLois Curfman McInnes }
1240855ac2c5SLois Curfman McInnes 
12415615d1e5SSatish Balay #undef __FUNC__
12425615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ"
12430de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1244b7c46309SBarry Smith {
1245b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1246dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1247416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1248416022c9SBarry Smith   Mat        B;
1249b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1250b7c46309SBarry Smith   Scalar     *array;
1251b7c46309SBarry Smith 
12523638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
1253e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
1254b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1255b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1256b7c46309SBarry Smith 
1257b7c46309SBarry Smith   /* copy over the A part */
1258ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1259b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1260b7c46309SBarry Smith   row = a->rstart;
1261dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1262b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1263416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1264b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1265b7c46309SBarry Smith   }
1266b7c46309SBarry Smith   aj = Aloc->j;
12674af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1268b7c46309SBarry Smith 
1269b7c46309SBarry Smith   /* copy over the B part */
1270ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1271b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1272b7c46309SBarry Smith   row = a->rstart;
12730452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1274dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1275b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1276416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1277b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1278b7c46309SBarry Smith   }
12790452661fSBarry Smith   PetscFree(ct);
12806d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12816d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12823638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
12830de55854SLois Curfman McInnes     *matout = B;
12840de55854SLois Curfman McInnes   } else {
12850de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12860452661fSBarry Smith     PetscFree(a->rowners);
12870de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
12880de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
12890452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
12900452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
12910de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1292a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
12930452661fSBarry Smith     PetscFree(a);
1294416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
12950452661fSBarry Smith     PetscHeaderDestroy(B);
12960de55854SLois Curfman McInnes   }
1297b7c46309SBarry Smith   return 0;
1298b7c46309SBarry Smith }
1299b7c46309SBarry Smith 
13005615d1e5SSatish Balay #undef __FUNC__
13015615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ"
13024b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1303a008b906SSatish Balay {
13044b967eb1SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
13054b967eb1SSatish Balay   Mat a = aij->A, b = aij->B;
1306a008b906SSatish Balay   int ierr,s1,s2,s3;
1307a008b906SSatish Balay 
13084b967eb1SSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
13094b967eb1SSatish Balay   if (rr) {
13104b967eb1SSatish Balay     s3 = aij->n;
13114b967eb1SSatish Balay     VecGetLocalSize_Fast(rr,s1);
1312e3372554SBarry Smith     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
13134b967eb1SSatish Balay     /* Overlap communication with computation. */
131443a90d84SBarry Smith     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1315a008b906SSatish Balay   }
13164b967eb1SSatish Balay   if (ll) {
13174b967eb1SSatish Balay     VecGetLocalSize_Fast(ll,s1);
1318e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1319c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
13204b967eb1SSatish Balay   }
13214b967eb1SSatish Balay   /* scale  the diagonal block */
1322c351683dSSatish Balay   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
13234b967eb1SSatish Balay 
13244b967eb1SSatish Balay   if (rr) {
13254b967eb1SSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
132643a90d84SBarry Smith     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1327c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
13284b967eb1SSatish Balay   }
13294b967eb1SSatish Balay 
1330a008b906SSatish Balay   return 0;
1331a008b906SSatish Balay }
1332a008b906SSatish Balay 
1333a008b906SSatish Balay 
1334682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
13355615d1e5SSatish Balay #undef __FUNC__
13365615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIAIJ"
1337682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1338682d7d0cSBarry Smith {
1339682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1340682d7d0cSBarry Smith 
1341682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1342682d7d0cSBarry Smith   else return 0;
1343682d7d0cSBarry Smith }
1344682d7d0cSBarry Smith 
13455615d1e5SSatish Balay #undef __FUNC__
13465615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIAIJ"
13475a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
13485a838052SSatish Balay {
13495a838052SSatish Balay   *bs = 1;
13505a838052SSatish Balay   return 0;
13515a838052SSatish Balay }
13525615d1e5SSatish Balay #undef __FUNC__
13535615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1354bb5a7306SBarry Smith static int MatSetUnfactored_MPIAIJ(Mat A)
1355bb5a7306SBarry Smith {
1356bb5a7306SBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1357bb5a7306SBarry Smith   int        ierr;
1358bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1359bb5a7306SBarry Smith   return 0;
1360bb5a7306SBarry Smith }
1361bb5a7306SBarry Smith 
13625a838052SSatish Balay 
1363fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
13643d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
13652f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
13660a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
13670a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
13688a729477SBarry Smith /* -------------------------------------------------------------------*/
13692ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
137039e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
137144a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
137244a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
137336ce4990SBarry Smith        0,0,
137436ce4990SBarry Smith        0,0,
137536ce4990SBarry Smith        0,0,
137644a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1377b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1378a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1379a008b906SSatish Balay        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1380ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13811eb62cbbSBarry Smith        0,
1382299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
138336ce4990SBarry Smith        0,0,0,0,
1384d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
138536ce4990SBarry Smith        0,0,
13863e85bedfSBarry Smith        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1387b49de8d1SLois Curfman McInnes        0,0,0,
1388598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1389052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
13903b2fbd54SBarry Smith        MatScale_MPIAIJ,0,0,0,
13910a198c4cSBarry Smith        MatGetBlockSize_MPIAIJ,0,0,0,0,
1392bb5a7306SBarry Smith        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
139336ce4990SBarry Smith 
13948a729477SBarry Smith 
13955615d1e5SSatish Balay #undef __FUNC__
13965615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ"
13971987afe7SBarry Smith /*@C
1398ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
13993a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
14003a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
14013a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
14023a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
14038a729477SBarry Smith 
14048a729477SBarry Smith    Input Parameters:
14051eb62cbbSBarry Smith .  comm - MPI communicator
14067d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
140792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
140892e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
140992e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
141092e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
141192e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
14127d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
141392e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1414ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1415ff756334SLois Curfman McInnes            (same for all local rows)
14162bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
141792e8d321SLois Curfman McInnes            diagonal portion of the local submatrix (possibly different for each row)
14182bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
14192bd5e0b2SLois Curfman McInnes            it is zero.
14202bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1421ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
14222bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
14232bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
14242bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
14258a729477SBarry Smith 
1426ff756334SLois Curfman McInnes    Output Parameter:
142744cd7ae7SLois Curfman McInnes .  A - the matrix
14288a729477SBarry Smith 
1429ff756334SLois Curfman McInnes    Notes:
1430ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1431ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
14320002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
14330002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1434ff756334SLois Curfman McInnes 
1435ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1436ff756334SLois Curfman McInnes    (possibly both).
1437ff756334SLois Curfman McInnes 
14385511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
14395511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
14405511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
14415511cfe3SLois Curfman McInnes 
14425511cfe3SLois Curfman McInnes    Options Database Keys:
14435511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
14446e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
14456e62573dSLois Curfman McInnes $        (max limit=5)
14466e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
14476e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
14486e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
14495511cfe3SLois Curfman McInnes 
1450e0245417SLois Curfman McInnes    Storage Information:
1451e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1452e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1453e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1454e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1455e0245417SLois Curfman McInnes 
1456e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
14575ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
14585ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
14595ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
14605ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1461ff756334SLois Curfman McInnes 
14625511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
14635511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
14642191d07cSBarry Smith 
1465b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1466b810aeb4SBarry Smith $         -------------------
14675511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
14685511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
14695511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
14705511cfe3SLois Curfman McInnes $         -------------------
1471b810aeb4SBarry Smith $
1472b810aeb4SBarry Smith 
14735511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
14745511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
14755511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
14765511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
14775511cfe3SLois Curfman McInnes 
14785511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
14795511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
14805511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
14813d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
148292e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
14836da5968aSLois Curfman McInnes    matrices.
14843a511b96SLois Curfman McInnes 
1485dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1486ff756334SLois Curfman McInnes 
1487fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
14888a729477SBarry Smith @*/
14891eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
149044cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
14918a729477SBarry Smith {
149244cd7ae7SLois Curfman McInnes   Mat          B;
149344cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
149436ce4990SBarry Smith   int          ierr, i,sum[2],work[2],size;
1495416022c9SBarry Smith 
149644cd7ae7SLois Curfman McInnes   *A = 0;
149744cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
149844cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
149944cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
150044cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
150144cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
150236ce4990SBarry Smith   MPI_Comm_size(comm,&size);
150336ce4990SBarry Smith   if (size == 1) {
150436ce4990SBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
150536ce4990SBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
150636ce4990SBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
150736ce4990SBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
150836ce4990SBarry Smith     B->ops.lufactor          = MatLUFactor_MPIAIJ;
150936ce4990SBarry Smith     B->ops.solve             = MatSolve_MPIAIJ;
151036ce4990SBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
151136ce4990SBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
151236ce4990SBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
151336ce4990SBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
151436ce4990SBarry Smith   }
151544cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
151644cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
151744cd7ae7SLois Curfman McInnes   B->factor     = 0;
151844cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
151990f02eecSBarry Smith   B->mapping    = 0;
1520d6dfbf8fSBarry Smith 
152144cd7ae7SLois Curfman McInnes   b->insertmode = NOT_SET_VALUES;
152244cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
152344cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
15241eb62cbbSBarry Smith 
1525b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1526e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
15271987afe7SBarry Smith 
1528dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
15291eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1530d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1531dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1532dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
15331eb62cbbSBarry Smith   }
153444cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
153544cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
153644cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
153744cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
153844cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
153944cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
15401eb62cbbSBarry Smith 
15411eb62cbbSBarry Smith   /* build local table of row and column ownerships */
154244cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
154344cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1544603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
154544cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
154644cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
154744cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
154844cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
15498a729477SBarry Smith   }
155044cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
155144cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
155244cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
155344cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
155444cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
155544cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
15568a729477SBarry Smith   }
155744cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
155844cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
15598a729477SBarry Smith 
15605ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
156144cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
156244cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
15637b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
156444cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
156544cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
15668a729477SBarry Smith 
15671eb62cbbSBarry Smith   /* build cache for off array entries formed */
156844cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
156990f02eecSBarry Smith   b->donotstash  = 0;
157044cd7ae7SLois Curfman McInnes   b->colmap      = 0;
157144cd7ae7SLois Curfman McInnes   b->garray      = 0;
157244cd7ae7SLois Curfman McInnes   b->roworiented = 1;
15738a729477SBarry Smith 
15741eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
157544cd7ae7SLois Curfman McInnes   b->lvec      = 0;
157644cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
15778a729477SBarry Smith 
15787a0afa10SBarry Smith   /* stuff for MatGetRow() */
157944cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
158044cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
158144cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
15827a0afa10SBarry Smith 
158344cd7ae7SLois Curfman McInnes   *A = B;
1584d6dfbf8fSBarry Smith   return 0;
1585d6dfbf8fSBarry Smith }
1586c74985f6SBarry Smith 
15875615d1e5SSatish Balay #undef __FUNC__
15885615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ"
15893d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1590d6dfbf8fSBarry Smith {
1591d6dfbf8fSBarry Smith   Mat        mat;
1592416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1593a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1594d6dfbf8fSBarry Smith 
1595416022c9SBarry Smith   *newmat       = 0;
15960452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1597a5a9c739SBarry Smith   PLogObjectCreate(mat);
15980452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1599416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
160044a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
160144a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1602d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1603c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1604d6dfbf8fSBarry Smith 
160544cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
160644cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
160744cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
160844cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1609d6dfbf8fSBarry Smith 
1610416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1611416022c9SBarry Smith   a->rend         = oldmat->rend;
1612416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1613416022c9SBarry Smith   a->cend         = oldmat->cend;
161417699dbbSLois Curfman McInnes   a->size         = oldmat->size;
161517699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
161664119d58SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
1617bcd2baecSBarry Smith   a->rowvalues    = 0;
1618bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1619d6dfbf8fSBarry Smith 
1620603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1621603f58a4SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1622603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1623603f58a4SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1624416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
16252ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
16260452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1627416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1628416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1629416022c9SBarry Smith   } else a->colmap = 0;
16303f41c07dSBarry Smith   if (oldmat->garray) {
16313f41c07dSBarry Smith     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
16323f41c07dSBarry Smith     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1633464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
16343f41c07dSBarry Smith     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1635416022c9SBarry Smith   } else a->garray = 0;
1636d6dfbf8fSBarry Smith 
1637416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1638416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1639a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1640416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1641416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1642416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1643416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1644416022c9SBarry Smith   PLogObjectParent(mat,a->B);
16455dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
164625cdf11fSBarry Smith   if (flg) {
1647682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1648682d7d0cSBarry Smith   }
16498a729477SBarry Smith   *newmat = mat;
16508a729477SBarry Smith   return 0;
16518a729477SBarry Smith }
1652416022c9SBarry Smith 
165377c4ece6SBarry Smith #include "sys.h"
1654416022c9SBarry Smith 
16555615d1e5SSatish Balay #undef __FUNC__
16565615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ"
165719bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1658416022c9SBarry Smith {
1659d65a2f8fSBarry Smith   Mat          A;
1660416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1661d65a2f8fSBarry Smith   Scalar       *vals,*svals;
166219bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1663416022c9SBarry Smith   MPI_Status   status;
166417699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1665d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
166619bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1667416022c9SBarry Smith 
166817699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
166917699dbbSLois Curfman McInnes   if (!rank) {
167090ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
167177c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1672e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1673416022c9SBarry Smith   }
1674416022c9SBarry Smith 
1675416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1676416022c9SBarry Smith   M = header[1]; N = header[2];
1677416022c9SBarry Smith   /* determine ownership of all rows */
167817699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
16790452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1680416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1681416022c9SBarry Smith   rowners[0] = 0;
168217699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1683416022c9SBarry Smith     rowners[i] += rowners[i-1];
1684416022c9SBarry Smith   }
168517699dbbSLois Curfman McInnes   rstart = rowners[rank];
168617699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1687416022c9SBarry Smith 
1688416022c9SBarry Smith   /* distribute row lengths to all processors */
16890452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1690416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
169117699dbbSLois Curfman McInnes   if (!rank) {
16920452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
169377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
16940452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
169517699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1696d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
16970452661fSBarry Smith     PetscFree(sndcounts);
1698416022c9SBarry Smith   }
1699416022c9SBarry Smith   else {
1700416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1701416022c9SBarry Smith   }
1702416022c9SBarry Smith 
170317699dbbSLois Curfman McInnes   if (!rank) {
1704416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
17050452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1706cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
170717699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1708416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1709416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1710416022c9SBarry Smith       }
1711416022c9SBarry Smith     }
17120452661fSBarry Smith     PetscFree(rowlengths);
1713416022c9SBarry Smith 
1714416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1715416022c9SBarry Smith     maxnz = 0;
171617699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
17170452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1718416022c9SBarry Smith     }
17190452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1720416022c9SBarry Smith 
1721416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1722416022c9SBarry Smith     nz = procsnz[0];
17230452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
172477c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1725d65a2f8fSBarry Smith 
1726d65a2f8fSBarry Smith     /* read in every one elses and ship off */
172717699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1728d65a2f8fSBarry Smith       nz = procsnz[i];
172977c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1730d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1731d65a2f8fSBarry Smith     }
17320452661fSBarry Smith     PetscFree(cols);
1733416022c9SBarry Smith   }
1734416022c9SBarry Smith   else {
1735416022c9SBarry Smith     /* determine buffer space needed for message */
1736416022c9SBarry Smith     nz = 0;
1737416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1738416022c9SBarry Smith       nz += ourlens[i];
1739416022c9SBarry Smith     }
17400452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1741416022c9SBarry Smith 
1742416022c9SBarry Smith     /* receive message of column indices*/
1743d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1744416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1745e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1746416022c9SBarry Smith   }
1747416022c9SBarry Smith 
1748416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1749cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1750416022c9SBarry Smith   jj = 0;
1751416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1752416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1753d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1754416022c9SBarry Smith       jj++;
1755416022c9SBarry Smith     }
1756416022c9SBarry Smith   }
1757d65a2f8fSBarry Smith 
1758d65a2f8fSBarry Smith   /* create our matrix */
1759416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1760416022c9SBarry Smith     ourlens[i] -= offlens[i];
1761416022c9SBarry Smith   }
1762d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1763d65a2f8fSBarry Smith   A = *newmat;
17646d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
1765d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1766d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1767d65a2f8fSBarry Smith   }
1768416022c9SBarry Smith 
176917699dbbSLois Curfman McInnes   if (!rank) {
17700452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1771416022c9SBarry Smith 
1772416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1773416022c9SBarry Smith     nz = procsnz[0];
177477c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1775d65a2f8fSBarry Smith 
1776d65a2f8fSBarry Smith     /* insert into matrix */
1777d65a2f8fSBarry Smith     jj      = rstart;
1778d65a2f8fSBarry Smith     smycols = mycols;
1779d65a2f8fSBarry Smith     svals   = vals;
1780d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1781d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1782d65a2f8fSBarry Smith       smycols += ourlens[i];
1783d65a2f8fSBarry Smith       svals   += ourlens[i];
1784d65a2f8fSBarry Smith       jj++;
1785416022c9SBarry Smith     }
1786416022c9SBarry Smith 
1787d65a2f8fSBarry Smith     /* read in other processors and ship out */
178817699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1789416022c9SBarry Smith       nz = procsnz[i];
179077c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1791d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1792416022c9SBarry Smith     }
17930452661fSBarry Smith     PetscFree(procsnz);
1794416022c9SBarry Smith   }
1795d65a2f8fSBarry Smith   else {
1796d65a2f8fSBarry Smith     /* receive numeric values */
17970452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1798416022c9SBarry Smith 
1799d65a2f8fSBarry Smith     /* receive message of values*/
1800d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1801d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1802e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1803d65a2f8fSBarry Smith 
1804d65a2f8fSBarry Smith     /* insert into matrix */
1805d65a2f8fSBarry Smith     jj      = rstart;
1806d65a2f8fSBarry Smith     smycols = mycols;
1807d65a2f8fSBarry Smith     svals   = vals;
1808d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1809d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1810d65a2f8fSBarry Smith       smycols += ourlens[i];
1811d65a2f8fSBarry Smith       svals   += ourlens[i];
1812d65a2f8fSBarry Smith       jj++;
1813d65a2f8fSBarry Smith     }
1814d65a2f8fSBarry Smith   }
18150452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1816d65a2f8fSBarry Smith 
18176d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
18186d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1819416022c9SBarry Smith   return 0;
1820416022c9SBarry Smith }
1821