xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 55843e3e646b2cfef4dff8bdf6ad93160bb9339c)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*55843e3eSBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.224 1997/11/03 04:45:30 bsmith Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
8d9942c19SSatish Balay #include "src/inline/spops.h"
98a729477SBarry Smith 
109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
129e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
139e25ed09SBarry Smith length of colmap equals the global matrix length.
149e25ed09SBarry Smith */
155615d1e5SSatish Balay #undef __FUNC__
16d4bb536fSBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private"
170a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat)
189e25ed09SBarry Smith {
1944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21905e6a2fSBarry Smith   int        n = B->n,i;
22dbb450caSBarry Smith 
233a40ed3dSBarry Smith   PetscFunctionBegin;
24758f045eSSatish Balay   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
25464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
26cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
27905e6a2fSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
283a40ed3dSBarry Smith   PetscFunctionReturn(0);
299e25ed09SBarry Smith }
309e25ed09SBarry Smith 
312493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
322493cbb0SBarry Smith 
330520107fSSatish Balay #define CHUNKSIZE   15
3430770e4dSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
350520107fSSatish Balay { \
360520107fSSatish Balay  \
370520107fSSatish Balay     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
3830770e4dSSatish Balay     rmax = aimax[row]; nrow = ailen[row];  \
39f5e9677aSSatish Balay     col1 = col - shift; \
40f5e9677aSSatish Balay      \
41ba4e3ef2SSatish Balay     low = 0; high = nrow; \
42ba4e3ef2SSatish Balay     while (high-low > 5) { \
43ba4e3ef2SSatish Balay       t = (low+high)/2; \
44ba4e3ef2SSatish Balay       if (rp[t] > col) high = t; \
45ba4e3ef2SSatish Balay       else             low  = t; \
46ba4e3ef2SSatish Balay     } \
470520107fSSatish Balay       for ( _i=0; _i<nrow; _i++ ) { \
48f5e9677aSSatish Balay         if (rp[_i] > col1) break; \
49f5e9677aSSatish Balay         if (rp[_i] == col1) { \
500520107fSSatish Balay           if (addv == ADD_VALUES) ap[_i] += value;   \
510520107fSSatish Balay           else                  ap[_i] = value; \
5230770e4dSSatish Balay           goto a_noinsert; \
530520107fSSatish Balay         } \
540520107fSSatish Balay       }  \
5589280ab3SLois Curfman McInnes       if (nonew == 1) goto a_noinsert; \
5611ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
570520107fSSatish Balay       if (nrow >= rmax) { \
580520107fSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
590520107fSSatish Balay         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
600520107fSSatish Balay         Scalar *new_a; \
610520107fSSatish Balay  \
6211ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
6389280ab3SLois Curfman McInnes  \
640520107fSSatish Balay         /* malloc new storage space */ \
650520107fSSatish Balay         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
660520107fSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
670520107fSSatish Balay         new_j   = (int *) (new_a + new_nz); \
680520107fSSatish Balay         new_i   = new_j + new_nz; \
690520107fSSatish Balay  \
700520107fSSatish Balay         /* copy over old data into new slots */ \
710520107fSSatish Balay         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
720520107fSSatish Balay         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
730520107fSSatish Balay         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
740520107fSSatish Balay         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
750520107fSSatish Balay         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
760520107fSSatish Balay                                                            len*sizeof(int)); \
770520107fSSatish Balay         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
780520107fSSatish Balay         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
790520107fSSatish Balay                                                            len*sizeof(Scalar));  \
800520107fSSatish Balay         /* free up old matrix storage */ \
81f5e9677aSSatish Balay  \
820520107fSSatish Balay         PetscFree(a->a);  \
830520107fSSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
840520107fSSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
850520107fSSatish Balay         a->singlemalloc = 1; \
860520107fSSatish Balay  \
870520107fSSatish Balay         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
8830770e4dSSatish Balay         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
890520107fSSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
900520107fSSatish Balay         a->maxnz += CHUNKSIZE; \
910520107fSSatish Balay         a->reallocs++; \
920520107fSSatish Balay       } \
930520107fSSatish Balay       N = nrow++ - 1; a->nz++; \
940520107fSSatish Balay       /* shift up all the later entries in this row */ \
950520107fSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
960520107fSSatish Balay         rp[ii+1] = rp[ii]; \
970520107fSSatish Balay         ap[ii+1] = ap[ii]; \
980520107fSSatish Balay       } \
99f5e9677aSSatish Balay       rp[_i] = col1;  \
1000520107fSSatish Balay       ap[_i] = value;  \
10130770e4dSSatish Balay       a_noinsert: ; \
1020520107fSSatish Balay       ailen[row] = nrow; \
1030520107fSSatish Balay }
1040a198c4cSBarry Smith 
10530770e4dSSatish Balay #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
10630770e4dSSatish Balay { \
10730770e4dSSatish Balay  \
10830770e4dSSatish Balay     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
10930770e4dSSatish Balay     rmax = bimax[row]; nrow = bilen[row];  \
11030770e4dSSatish Balay     col1 = col - shift; \
11130770e4dSSatish Balay      \
112ba4e3ef2SSatish Balay     low = 0; high = nrow; \
113ba4e3ef2SSatish Balay     while (high-low > 5) { \
114ba4e3ef2SSatish Balay       t = (low+high)/2; \
115ba4e3ef2SSatish Balay       if (rp[t] > col) high = t; \
116ba4e3ef2SSatish Balay       else             low  = t; \
117ba4e3ef2SSatish Balay     } \
11830770e4dSSatish Balay        for ( _i=0; _i<nrow; _i++ ) { \
11930770e4dSSatish Balay         if (rp[_i] > col1) break; \
12030770e4dSSatish Balay         if (rp[_i] == col1) { \
12130770e4dSSatish Balay           if (addv == ADD_VALUES) ap[_i] += value;   \
12230770e4dSSatish Balay           else                  ap[_i] = value; \
12330770e4dSSatish Balay           goto b_noinsert; \
12430770e4dSSatish Balay         } \
12530770e4dSSatish Balay       }  \
12689280ab3SLois Curfman McInnes       if (nonew == 1) goto b_noinsert; \
12711ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
12830770e4dSSatish Balay       if (nrow >= rmax) { \
12930770e4dSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
13074c639caSSatish Balay         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
13130770e4dSSatish Balay         Scalar *new_a; \
13230770e4dSSatish Balay  \
13311ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
13489280ab3SLois Curfman McInnes  \
13530770e4dSSatish Balay         /* malloc new storage space */ \
13674c639caSSatish Balay         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
13730770e4dSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
13830770e4dSSatish Balay         new_j   = (int *) (new_a + new_nz); \
13930770e4dSSatish Balay         new_i   = new_j + new_nz; \
14030770e4dSSatish Balay  \
14130770e4dSSatish Balay         /* copy over old data into new slots */ \
14230770e4dSSatish Balay         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
14374c639caSSatish Balay         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
14430770e4dSSatish Balay         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
14530770e4dSSatish Balay         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
14630770e4dSSatish Balay         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
14730770e4dSSatish Balay                                                            len*sizeof(int)); \
14830770e4dSSatish Balay         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
14930770e4dSSatish Balay         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
15030770e4dSSatish Balay                                                            len*sizeof(Scalar));  \
15130770e4dSSatish Balay         /* free up old matrix storage */ \
15230770e4dSSatish Balay  \
15374c639caSSatish Balay         PetscFree(b->a);  \
15474c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
15574c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
15674c639caSSatish Balay         b->singlemalloc = 1; \
15730770e4dSSatish Balay  \
15830770e4dSSatish Balay         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
15930770e4dSSatish Balay         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
16074c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
16174c639caSSatish Balay         b->maxnz += CHUNKSIZE; \
16274c639caSSatish Balay         b->reallocs++; \
16330770e4dSSatish Balay       } \
16474c639caSSatish Balay       N = nrow++ - 1; b->nz++; \
16530770e4dSSatish Balay       /* shift up all the later entries in this row */ \
16630770e4dSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
16730770e4dSSatish Balay         rp[ii+1] = rp[ii]; \
16830770e4dSSatish Balay         ap[ii+1] = ap[ii]; \
16930770e4dSSatish Balay       } \
17030770e4dSSatish Balay       rp[_i] = col1;  \
17130770e4dSSatish Balay       ap[_i] = value;  \
17230770e4dSSatish Balay       b_noinsert: ; \
17330770e4dSSatish Balay       bilen[row] = nrow; \
17430770e4dSSatish Balay }
17530770e4dSSatish Balay 
1760520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
1775615d1e5SSatish Balay #undef __FUNC__
1785615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ"
1798f6be9afSLois Curfman McInnes int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
1808a729477SBarry Smith {
18144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1824b0e389bSBarry Smith   Scalar     value;
1831eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
1841eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
185905e6a2fSBarry Smith   int        roworiented = aij->roworiented;
1868a729477SBarry Smith 
1870520107fSSatish Balay   /* Some Variables required in the macro */
1884ee7247eSSatish Balay   Mat        A = aij->A;
1894ee7247eSSatish Balay   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
19030770e4dSSatish Balay   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
19130770e4dSSatish Balay   Scalar     *aa = a->a;
19230770e4dSSatish Balay 
19330770e4dSSatish Balay   Mat        B = aij->B;
19430770e4dSSatish Balay   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
19530770e4dSSatish Balay   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
19630770e4dSSatish Balay   Scalar     *ba = b->a;
19730770e4dSSatish Balay 
198ba4e3ef2SSatish Balay   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
19930770e4dSSatish Balay   int        nonew = a->nonew,shift = a->indexshift;
20030770e4dSSatish Balay   Scalar     *ap;
2014ee7247eSSatish Balay 
2023a40ed3dSBarry Smith   PetscFunctionBegin;
2038a729477SBarry Smith   for ( i=0; i<m; i++ ) {
2043a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
205e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
206e3372554SBarry Smith     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
2070a198c4cSBarry Smith #endif
2084b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
2094b0e389bSBarry Smith       row = im[i] - rstart;
2101eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
2114b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
2124b0e389bSBarry Smith           col = in[j] - cstart;
2134b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
21430770e4dSSatish Balay           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
2150520107fSSatish Balay           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
2161eb62cbbSBarry Smith         }
2173a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
218e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
219e3372554SBarry Smith         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
2200a198c4cSBarry Smith #endif
2211eb62cbbSBarry Smith         else {
222227d817aSBarry Smith           if (mat->was_assembled) {
223905e6a2fSBarry Smith             if (!aij->colmap) {
224905e6a2fSBarry Smith               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
225905e6a2fSBarry Smith             }
226905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
227ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2282493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2294b0e389bSBarry Smith               col =  in[j];
2309bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
231f9508a3cSSatish Balay               B = aij->B;
232f9508a3cSSatish Balay               b = (Mat_SeqAIJ *) B->data;
233f9508a3cSSatish Balay               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
234f9508a3cSSatish Balay               ba = b->a;
235d6dfbf8fSBarry Smith             }
2369e25ed09SBarry Smith           }
2374b0e389bSBarry Smith           else col = in[j];
2384b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
23930770e4dSSatish Balay           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
24030770e4dSSatish Balay           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
2411eb62cbbSBarry Smith         }
2421eb62cbbSBarry Smith       }
2431eb62cbbSBarry Smith     }
2441eb62cbbSBarry Smith     else {
24590f02eecSBarry Smith       if (roworiented && !aij->donotstash) {
2464b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
2474b0e389bSBarry Smith       }
2484b0e389bSBarry Smith       else {
24990f02eecSBarry Smith         if (!aij->donotstash) {
2504b0e389bSBarry Smith           row = im[i];
2514b0e389bSBarry Smith           for ( j=0; j<n; j++ ) {
2524b0e389bSBarry Smith             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
2534b0e389bSBarry Smith           }
2544b0e389bSBarry Smith         }
2551eb62cbbSBarry Smith       }
2568a729477SBarry Smith     }
25790f02eecSBarry Smith   }
2583a40ed3dSBarry Smith   PetscFunctionReturn(0);
2598a729477SBarry Smith }
2608a729477SBarry Smith 
2615615d1e5SSatish Balay #undef __FUNC__
2625615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ"
2638f6be9afSLois Curfman McInnes int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
264b49de8d1SLois Curfman McInnes {
265b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
266b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
267b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
268b49de8d1SLois Curfman McInnes 
2693a40ed3dSBarry Smith   PetscFunctionBegin;
270b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
271e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
272e3372554SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
273b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
274b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
275b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
276e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
277e3372554SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
278b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
279b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
280b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281b49de8d1SLois Curfman McInnes         }
282b49de8d1SLois Curfman McInnes         else {
283905e6a2fSBarry Smith           if (!aij->colmap) {
284905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
285905e6a2fSBarry Smith           }
286905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
287e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
288d9d09a02SSatish Balay           else {
289b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
290b49de8d1SLois Curfman McInnes           }
291b49de8d1SLois Curfman McInnes         }
292b49de8d1SLois Curfman McInnes       }
293d9d09a02SSatish Balay     }
294b49de8d1SLois Curfman McInnes     else {
295e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
296b49de8d1SLois Curfman McInnes     }
297b49de8d1SLois Curfman McInnes   }
2983a40ed3dSBarry Smith   PetscFunctionReturn(0);
299b49de8d1SLois Curfman McInnes }
300b49de8d1SLois Curfman McInnes 
3015615d1e5SSatish Balay #undef __FUNC__
3025615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
3038f6be9afSLois Curfman McInnes int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
3048a729477SBarry Smith {
30544a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
306d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
30717699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
30817699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
3091eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
3106abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
3111eb62cbbSBarry Smith   InsertMode  addv;
3121eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
3131eb62cbbSBarry Smith 
3143a40ed3dSBarry Smith   PetscFunctionBegin;
3151eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
316ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
317dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
318e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
3191eb62cbbSBarry Smith   }
32047794344SBarry Smith   mat->insertmode = addv; /* in case this processor had no cache */
3211eb62cbbSBarry Smith 
3221eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3230452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
324cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3250452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
3261eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
3271eb62cbbSBarry Smith     idx = aij->stash.idx[i];
32817699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3291eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3301eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
3318a729477SBarry Smith       }
3328a729477SBarry Smith     }
3338a729477SBarry Smith   }
33417699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3351eb62cbbSBarry Smith 
3361eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3370452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
338ca161407SBarry Smith   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
33917699dbbSLois Curfman McInnes   nreceives = work[rank];
340ca161407SBarry Smith   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
34117699dbbSLois Curfman McInnes   nmax = work[rank];
3420452661fSBarry Smith   PetscFree(work);
3431eb62cbbSBarry Smith 
3441eb62cbbSBarry Smith   /* post receives:
3451eb62cbbSBarry Smith        1) each message will consist of ordered pairs
3461eb62cbbSBarry Smith      (global index,value) we store the global index as a double
347d6dfbf8fSBarry Smith      to simplify the message passing.
3481eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
3491eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
3501eb62cbbSBarry Smith      this is a lot of wasted space.
3511eb62cbbSBarry Smith 
3521eb62cbbSBarry Smith 
3531eb62cbbSBarry Smith        This could be done better.
3541eb62cbbSBarry Smith   */
355ca161407SBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
356ca161407SBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
3571eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
358ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
359ca161407SBarry Smith               comm,recv_waits+i);CHKERRQ(ierr);
3601eb62cbbSBarry Smith   }
3611eb62cbbSBarry Smith 
3621eb62cbbSBarry Smith   /* do sends:
3631eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3641eb62cbbSBarry Smith          the ith processor
3651eb62cbbSBarry Smith   */
3660452661fSBarry Smith   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
367ca161407SBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
3680452661fSBarry Smith   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
3691eb62cbbSBarry Smith   starts[0]  = 0;
37017699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3711eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
3721eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
3731eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
3741eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
3751eb62cbbSBarry Smith   }
3760452661fSBarry Smith   PetscFree(owner);
3771eb62cbbSBarry Smith   starts[0] = 0;
37817699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3791eb62cbbSBarry Smith   count = 0;
38017699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3811eb62cbbSBarry Smith     if (procs[i]) {
382ca161407SBarry Smith       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
3831eb62cbbSBarry Smith     }
3841eb62cbbSBarry Smith   }
3850452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
3861eb62cbbSBarry Smith 
3871eb62cbbSBarry Smith   /* Free cache space */
38855908cccSLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
38978b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
3901eb62cbbSBarry Smith 
3911eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
3921eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
3931eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
3941eb62cbbSBarry Smith   aij->rmax       = nmax;
3951eb62cbbSBarry Smith 
3963a40ed3dSBarry Smith   PetscFunctionReturn(0);
3971eb62cbbSBarry Smith }
39844a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
3991eb62cbbSBarry Smith 
4005615d1e5SSatish Balay #undef __FUNC__
4015615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
4028f6be9afSLois Curfman McInnes int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
4031eb62cbbSBarry Smith {
40444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
4051eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
406416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
407905e6a2fSBarry Smith   int         row,col,other_disassembled;
4081eb62cbbSBarry Smith   Scalar      *values,val;
40947794344SBarry Smith   InsertMode  addv = mat->insertmode;
4101eb62cbbSBarry Smith 
4113a40ed3dSBarry Smith   PetscFunctionBegin;
4121eb62cbbSBarry Smith   /*  wait on receives */
4131eb62cbbSBarry Smith   while (count) {
414ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
4151eb62cbbSBarry Smith     /* unpack receives into our local space */
416d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
417ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
4181eb62cbbSBarry Smith     n = n/3;
4191eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
420227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
421227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
4221eb62cbbSBarry Smith       val = values[3*i+2];
4231eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
4241eb62cbbSBarry Smith         col -= aij->cstart;
4256fd7127cSSatish Balay         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
4263a40ed3dSBarry Smith       } else {
42755a1b374SBarry Smith         if (mat->was_assembled || mat->assembled) {
428905e6a2fSBarry Smith           if (!aij->colmap) {
429905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
430905e6a2fSBarry Smith           }
431905e6a2fSBarry Smith           col = aij->colmap[col] - 1;
432ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4332493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
434227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
435d6dfbf8fSBarry Smith           }
4369e25ed09SBarry Smith         }
4376fd7127cSSatish Balay         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
4381eb62cbbSBarry Smith       }
4391eb62cbbSBarry Smith     }
4401eb62cbbSBarry Smith     count--;
4411eb62cbbSBarry Smith   }
4420452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
4431eb62cbbSBarry Smith 
4441eb62cbbSBarry Smith   /* wait on sends */
4451eb62cbbSBarry Smith   if (aij->nsends) {
4460a198c4cSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
447ca161407SBarry Smith     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
4480452661fSBarry Smith     PetscFree(send_status);
4491eb62cbbSBarry Smith   }
4500452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
4511eb62cbbSBarry Smith 
45278b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
45378b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
4541eb62cbbSBarry Smith 
4552493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
4562493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
457ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
458227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
4592493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
4602493cbb0SBarry Smith   }
4612493cbb0SBarry Smith 
4626d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
46378b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
4645e42470aSBarry Smith   }
46578b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
46678b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
4675e42470aSBarry Smith 
4687a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
4693a40ed3dSBarry Smith   PetscFunctionReturn(0);
4708a729477SBarry Smith }
4718a729477SBarry Smith 
4725615d1e5SSatish Balay #undef __FUNC__
4735615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ"
4748f6be9afSLois Curfman McInnes int MatZeroEntries_MPIAIJ(Mat A)
4751eb62cbbSBarry Smith {
47644a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
477dbd7a890SLois Curfman McInnes   int        ierr;
4783a40ed3dSBarry Smith 
4793a40ed3dSBarry Smith   PetscFunctionBegin;
48078b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
48178b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
4823a40ed3dSBarry Smith   PetscFunctionReturn(0);
4831eb62cbbSBarry Smith }
4841eb62cbbSBarry Smith 
4851eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
4861eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
4871eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
4881eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
4891eb62cbbSBarry Smith    routine.
4901eb62cbbSBarry Smith */
4915615d1e5SSatish Balay #undef __FUNC__
4925615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ"
4938f6be9afSLois Curfman McInnes int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
4941eb62cbbSBarry Smith {
49544a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
49617699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
4976abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
49817699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
4995392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
500d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
501d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
5021eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
5031eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
5041eb62cbbSBarry Smith   IS             istmp;
5051eb62cbbSBarry Smith 
5063a40ed3dSBarry Smith   PetscFunctionBegin;
50777c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
50878b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
5091eb62cbbSBarry Smith 
5101eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
5110452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
512cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
5130452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
5141eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
5151eb62cbbSBarry Smith     idx = rows[i];
5161eb62cbbSBarry Smith     found = 0;
51717699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
5181eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
5191eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
5201eb62cbbSBarry Smith       }
5211eb62cbbSBarry Smith     }
522e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
5231eb62cbbSBarry Smith   }
52417699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
5251eb62cbbSBarry Smith 
5261eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
5270452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
528ca161407SBarry Smith   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
52917699dbbSLois Curfman McInnes   nrecvs = work[rank];
530ca161407SBarry Smith   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
53117699dbbSLois Curfman McInnes   nmax = work[rank];
5320452661fSBarry Smith   PetscFree(work);
5331eb62cbbSBarry Smith 
5341eb62cbbSBarry Smith   /* post receives:   */
5353a40ed3dSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
536ca161407SBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
5371eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
538ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
5391eb62cbbSBarry Smith   }
5401eb62cbbSBarry Smith 
5411eb62cbbSBarry Smith   /* do sends:
5421eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
5431eb62cbbSBarry Smith          the ith processor
5441eb62cbbSBarry Smith   */
5450452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
5463a40ed3dSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
5470452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
5481eb62cbbSBarry Smith   starts[0] = 0;
54917699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
5501eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
5511eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
5521eb62cbbSBarry Smith   }
5531eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
5541eb62cbbSBarry Smith 
5551eb62cbbSBarry Smith   starts[0] = 0;
55617699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
5571eb62cbbSBarry Smith   count = 0;
55817699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
5591eb62cbbSBarry Smith     if (procs[i]) {
560ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
5611eb62cbbSBarry Smith     }
5621eb62cbbSBarry Smith   }
5630452661fSBarry Smith   PetscFree(starts);
5641eb62cbbSBarry Smith 
56517699dbbSLois Curfman McInnes   base = owners[rank];
5661eb62cbbSBarry Smith 
5671eb62cbbSBarry Smith   /*  wait on receives */
5680452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
5691eb62cbbSBarry Smith   source = lens + nrecvs;
5701eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
5711eb62cbbSBarry Smith   while (count) {
572ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
5731eb62cbbSBarry Smith     /* unpack receives into our local space */
574ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
575d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
576d6dfbf8fSBarry Smith     lens[imdex]  = n;
5771eb62cbbSBarry Smith     slen += n;
5781eb62cbbSBarry Smith     count--;
5791eb62cbbSBarry Smith   }
5800452661fSBarry Smith   PetscFree(recv_waits);
5811eb62cbbSBarry Smith 
5821eb62cbbSBarry Smith   /* move the data into the send scatter */
5830452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
5841eb62cbbSBarry Smith   count = 0;
5851eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
5861eb62cbbSBarry Smith     values = rvalues + i*nmax;
5871eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
5881eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
5891eb62cbbSBarry Smith     }
5901eb62cbbSBarry Smith   }
5910452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
5920452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
5931eb62cbbSBarry Smith 
5941eb62cbbSBarry Smith   /* actually zap the local rows */
595029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
596464493b3SBarry Smith   PLogObjectParent(A,istmp);
5970452661fSBarry Smith   PetscFree(lrows);
59878b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
59978b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
60078b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
6011eb62cbbSBarry Smith 
6021eb62cbbSBarry Smith   /* wait on sends */
6031eb62cbbSBarry Smith   if (nsends) {
604ca161407SBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
605ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
6060452661fSBarry Smith     PetscFree(send_status);
6071eb62cbbSBarry Smith   }
6080452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
6091eb62cbbSBarry Smith 
6103a40ed3dSBarry Smith   PetscFunctionReturn(0);
6111eb62cbbSBarry Smith }
6121eb62cbbSBarry Smith 
6135615d1e5SSatish Balay #undef __FUNC__
6145615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ"
6158f6be9afSLois Curfman McInnes int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
6161eb62cbbSBarry Smith {
617416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
618fbd6ef76SBarry Smith   int        ierr,nt;
619416022c9SBarry Smith 
6203a40ed3dSBarry Smith   PetscFunctionBegin;
621a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
622fbd6ef76SBarry Smith   if (nt != a->n) {
623f40265daSLois Curfman McInnes     SETERRQ(1,0,"Incompatible partition of A and xx");
624fbd6ef76SBarry Smith   }
62543a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
62638597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
62743a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
62838597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
6293a40ed3dSBarry Smith   PetscFunctionReturn(0);
6301eb62cbbSBarry Smith }
6311eb62cbbSBarry Smith 
6325615d1e5SSatish Balay #undef __FUNC__
6335615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ"
6348f6be9afSLois Curfman McInnes int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
635da3a660dSBarry Smith {
636416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
637da3a660dSBarry Smith   int        ierr;
6383a40ed3dSBarry Smith 
6393a40ed3dSBarry Smith   PetscFunctionBegin;
64043a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
64138597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
64243a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
64338597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
6443a40ed3dSBarry Smith   PetscFunctionReturn(0);
645da3a660dSBarry Smith }
646da3a660dSBarry Smith 
6475615d1e5SSatish Balay #undef __FUNC__
6485615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ"
6498f6be9afSLois Curfman McInnes int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
650da3a660dSBarry Smith {
651416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
652da3a660dSBarry Smith   int        ierr;
653da3a660dSBarry Smith 
6543a40ed3dSBarry Smith   PetscFunctionBegin;
655da3a660dSBarry Smith   /* do nondiagonal part */
65638597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
657da3a660dSBarry Smith   /* send it on its way */
658537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
659da3a660dSBarry Smith   /* do local part */
66038597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
661da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
662da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
663da3a660dSBarry Smith   /* but is not perhaps always true. */
664537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
6653a40ed3dSBarry Smith   PetscFunctionReturn(0);
666da3a660dSBarry Smith }
667da3a660dSBarry Smith 
6685615d1e5SSatish Balay #undef __FUNC__
6695615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ"
6708f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
671da3a660dSBarry Smith {
672416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
673da3a660dSBarry Smith   int        ierr;
674da3a660dSBarry Smith 
6753a40ed3dSBarry Smith   PetscFunctionBegin;
676da3a660dSBarry Smith   /* do nondiagonal part */
67738597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
678da3a660dSBarry Smith   /* send it on its way */
679537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
680da3a660dSBarry Smith   /* do local part */
68138597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
682da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
683da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
684da3a660dSBarry Smith   /* but is not perhaps always true. */
6850a198c4cSBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
6863a40ed3dSBarry Smith   PetscFunctionReturn(0);
687da3a660dSBarry Smith }
688da3a660dSBarry Smith 
6891eb62cbbSBarry Smith /*
6901eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
6911eb62cbbSBarry Smith    diagonal block
6921eb62cbbSBarry Smith */
6935615d1e5SSatish Balay #undef __FUNC__
6945615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ"
6958f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
6961eb62cbbSBarry Smith {
6973a40ed3dSBarry Smith   int        ierr;
698416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
6993a40ed3dSBarry Smith 
7003a40ed3dSBarry Smith   PetscFunctionBegin;
7013a40ed3dSBarry Smith   if (a->M != a->N) SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
7025baf8537SBarry Smith   if (a->rstart != a->cstart || a->rend != a->cend) {
7033a40ed3dSBarry Smith     SETERRQ(1,0,"row partition must equal col partition");
7043a40ed3dSBarry Smith   }
7053a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
7063a40ed3dSBarry Smith   PetscFunctionReturn(0);
7071eb62cbbSBarry Smith }
7081eb62cbbSBarry Smith 
7095615d1e5SSatish Balay #undef __FUNC__
7105615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ"
7118f6be9afSLois Curfman McInnes int MatScale_MPIAIJ(Scalar *aa,Mat A)
712052efed2SBarry Smith {
713052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
714052efed2SBarry Smith   int        ierr;
7153a40ed3dSBarry Smith 
7163a40ed3dSBarry Smith   PetscFunctionBegin;
717052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
718052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
7193a40ed3dSBarry Smith   PetscFunctionReturn(0);
720052efed2SBarry Smith }
721052efed2SBarry Smith 
7225615d1e5SSatish Balay #undef __FUNC__
723d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ"
7248f6be9afSLois Curfman McInnes int MatDestroy_MPIAIJ(PetscObject obj)
7251eb62cbbSBarry Smith {
7261eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
72744a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
7281eb62cbbSBarry Smith   int        ierr;
72983e2fdc7SBarry Smith 
7303a40ed3dSBarry Smith   PetscFunctionBegin;
7313a40ed3dSBarry Smith #if defined(USE_PETSC_LOG)
7326e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
733a5a9c739SBarry Smith #endif
73483e2fdc7SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
7350452661fSBarry Smith   PetscFree(aij->rowners);
73678b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
73778b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
7380452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
7390452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
7401eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
741a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
7427a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
7430452661fSBarry Smith   PetscFree(aij);
744a5a9c739SBarry Smith   PLogObjectDestroy(mat);
7450452661fSBarry Smith   PetscHeaderDestroy(mat);
7463a40ed3dSBarry Smith   PetscFunctionReturn(0);
7471eb62cbbSBarry Smith }
748ee50ffe9SBarry Smith 
7495615d1e5SSatish Balay #undef __FUNC__
750d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary"
7518f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
7521eb62cbbSBarry Smith {
753416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
754416022c9SBarry Smith   int         ierr;
755416022c9SBarry Smith 
7563a40ed3dSBarry Smith   PetscFunctionBegin;
75717699dbbSLois Curfman McInnes   if (aij->size == 1) {
758416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
759416022c9SBarry Smith   }
760e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
7613a40ed3dSBarry Smith   PetscFunctionReturn(0);
762416022c9SBarry Smith }
763416022c9SBarry Smith 
7645615d1e5SSatish Balay #undef __FUNC__
765d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
7668f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
767416022c9SBarry Smith {
76844a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
769dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
770a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
771d636dbe3SBarry Smith   FILE        *fd;
77219bcc07fSBarry Smith   ViewerType  vtype;
773416022c9SBarry Smith 
7743a40ed3dSBarry Smith   PetscFunctionBegin;
77519bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
77619bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
77790ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
7780a198c4cSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
7794e220ebcSLois Curfman McInnes       MatInfo info;
7804e220ebcSLois Curfman McInnes       int     flg;
781a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
78290ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
7834e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
78495e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
78577c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
78695e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
7874e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
78895e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
7894e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
7904e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
7914e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
7924e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
7934e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
794a56f8943SBarry Smith       fflush(fd);
79577c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
796a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
7973a40ed3dSBarry Smith       PetscFunctionReturn(0);
7983a40ed3dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
7993a40ed3dSBarry Smith       PetscFunctionReturn(0);
80008480c60SBarry Smith     }
801416022c9SBarry Smith   }
802416022c9SBarry Smith 
80319bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
80419bcc07fSBarry Smith     Draw       draw;
80519bcc07fSBarry Smith     PetscTruth isnull;
80619bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
8073a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
80819bcc07fSBarry Smith   }
80919bcc07fSBarry Smith 
81019bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
81190ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
81277c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
813d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
81417699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
8151eb62cbbSBarry Smith            aij->cend);
81678b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
81778b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
818d13ab20cSBarry Smith     fflush(fd);
81977c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
8203a40ed3dSBarry Smith   } else {
821a56f8943SBarry Smith     int size = aij->size;
822a56f8943SBarry Smith     rank = aij->rank;
82317699dbbSLois Curfman McInnes     if (size == 1) {
82478b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
8253a40ed3dSBarry Smith     } else {
82695373324SBarry Smith       /* assemble the entire matrix onto first processor. */
82795373324SBarry Smith       Mat         A;
828ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
8292eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
83095373324SBarry Smith       Scalar      *a;
8312ee70a88SLois Curfman McInnes 
83217699dbbSLois Curfman McInnes       if (!rank) {
833*55843e3eSBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
8343a40ed3dSBarry Smith       } else {
835*55843e3eSBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
83695373324SBarry Smith       }
837464493b3SBarry Smith       PLogObjectParent(mat,A);
838416022c9SBarry Smith 
83995373324SBarry Smith       /* copy over the A part */
840ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
8412ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
84295373324SBarry Smith       row = aij->rstart;
843dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
84495373324SBarry Smith       for ( i=0; i<m; i++ ) {
845416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
84695373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
84795373324SBarry Smith       }
8482ee70a88SLois Curfman McInnes       aj = Aloc->j;
849dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
85095373324SBarry Smith 
85195373324SBarry Smith       /* copy over the B part */
852ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
8532ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
85495373324SBarry Smith       row = aij->rstart;
8550452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
856dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
85795373324SBarry Smith       for ( i=0; i<m; i++ ) {
858416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
85995373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
86095373324SBarry Smith       }
8610452661fSBarry Smith       PetscFree(ct);
8626d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8636d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
864*55843e3eSBarry Smith       /*
865*55843e3eSBarry Smith          Everyone has to call to draw the matrix since the graphics waits are
866*55843e3eSBarry Smith          synchronized across all processors that share the Draw object
867*55843e3eSBarry Smith       */
868*55843e3eSBarry Smith       if (!rank || vtype == DRAW_VIEWER) {
86978b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
87095373324SBarry Smith       }
87178b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
87295373324SBarry Smith     }
87395373324SBarry Smith   }
8743a40ed3dSBarry Smith   PetscFunctionReturn(0);
8751eb62cbbSBarry Smith }
8761eb62cbbSBarry Smith 
8775615d1e5SSatish Balay #undef __FUNC__
878d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ"
8798f6be9afSLois Curfman McInnes int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
880416022c9SBarry Smith {
881416022c9SBarry Smith   Mat         mat = (Mat) obj;
882416022c9SBarry Smith   int         ierr;
88319bcc07fSBarry Smith   ViewerType  vtype;
884416022c9SBarry Smith 
8853a40ed3dSBarry Smith   PetscFunctionBegin;
88619bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
88719bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
88819bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
889d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
890416022c9SBarry Smith   }
89119bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
8923a40ed3dSBarry Smith     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
893416022c9SBarry Smith   }
8943a40ed3dSBarry Smith   PetscFunctionReturn(0);
895416022c9SBarry Smith }
896416022c9SBarry Smith 
8971eb62cbbSBarry Smith /*
8981eb62cbbSBarry Smith     This has to provide several versions.
8991eb62cbbSBarry Smith 
9001eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
9011eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
902d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
9031eb62cbbSBarry Smith */
9045615d1e5SSatish Balay #undef __FUNC__
9055615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ"
9068f6be9afSLois Curfman McInnes int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
907dbb450caSBarry Smith                            double fshift,int its,Vec xx)
9088a729477SBarry Smith {
90944a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
910d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
911ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
912c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
9136abc6512SBarry Smith   int        ierr,*idx, *diag;
914416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
9158a729477SBarry Smith 
9163a40ed3dSBarry Smith   PetscFunctionBegin;
917d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
918dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
919dbb450caSBarry Smith   ls = ls + shift;
92083e2fdc7SBarry Smith   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
921d6dfbf8fSBarry Smith   diag = A->diag;
922c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
923da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
9243a40ed3dSBarry Smith       ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
9253a40ed3dSBarry Smith       PetscFunctionReturn(0);
926da3a660dSBarry Smith     }
9273a40ed3dSBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
9283a40ed3dSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
929d6dfbf8fSBarry Smith     while (its--) {
930d6dfbf8fSBarry Smith       /* go down through the rows */
931d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
932d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
933dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
934dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
935d6dfbf8fSBarry Smith         sum  = b[i];
936d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
937dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
938d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
939dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
940dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
941d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
94255a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
943d6dfbf8fSBarry Smith       }
944d6dfbf8fSBarry Smith       /* come up through the rows */
945d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
946d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
947dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
948dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
949d6dfbf8fSBarry Smith         sum  = b[i];
950d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
951dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
952d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
953dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
954dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
955d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
95655a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
957d6dfbf8fSBarry Smith       }
958d6dfbf8fSBarry Smith     }
9593a40ed3dSBarry Smith   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
960da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
9613a40ed3dSBarry Smith       ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
9623a40ed3dSBarry Smith       PetscFunctionReturn(0);
963da3a660dSBarry Smith     }
9643a40ed3dSBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
9653a40ed3dSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
966d6dfbf8fSBarry Smith     while (its--) {
967d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
968d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
969dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
970dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
971d6dfbf8fSBarry Smith         sum  = b[i];
972d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
973dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
974d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
975dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
976dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
977d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
97855a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
979d6dfbf8fSBarry Smith       }
980d6dfbf8fSBarry Smith     }
9813a40ed3dSBarry Smith   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
982da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
9833a40ed3dSBarry Smith       ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
9843a40ed3dSBarry Smith       PetscFunctionReturn(0);
985da3a660dSBarry Smith     }
98643a90d84SBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
98778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
98843a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
98978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
990d6dfbf8fSBarry Smith     while (its--) {
991d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
992d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
993dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
994dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
995d6dfbf8fSBarry Smith         sum  = b[i];
996d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
997dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
998d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
999dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
1000dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
1001d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
100255a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1003d6dfbf8fSBarry Smith       }
1004d6dfbf8fSBarry Smith     }
10053a40ed3dSBarry Smith   } else {
1006e3372554SBarry Smith     SETERRQ(1,0,"Option not supported");
1007c16cb8f2SBarry Smith   }
10083a40ed3dSBarry Smith   PetscFunctionReturn(0);
10098a729477SBarry Smith }
1010a66be287SLois Curfman McInnes 
10115615d1e5SSatish Balay #undef __FUNC__
1012d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ"
10138f6be9afSLois Curfman McInnes int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1014a66be287SLois Curfman McInnes {
1015a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1016a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
10174e220ebcSLois Curfman McInnes   int        ierr;
10184e220ebcSLois Curfman McInnes   double     isend[5], irecv[5];
1019a66be287SLois Curfman McInnes 
10203a40ed3dSBarry Smith   PetscFunctionBegin;
10214e220ebcSLois Curfman McInnes   info->rows_global    = (double)mat->M;
10224e220ebcSLois Curfman McInnes   info->columns_global = (double)mat->N;
10234e220ebcSLois Curfman McInnes   info->rows_local     = (double)mat->m;
10244e220ebcSLois Curfman McInnes   info->columns_local  = (double)mat->N;
10254e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
10264e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
10274e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
10284e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
10294e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
10304e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
10314e220ebcSLois Curfman McInnes   isend[3] += info->memory;  isend[4] += info->mallocs;
1032a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
10334e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
10344e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
10354e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
10364e220ebcSLois Curfman McInnes     info->memory       = isend[3];
10374e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
1038a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1039ca161407SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
10404e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10414e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10424e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10434e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10444e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1045a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1046ca161407SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
10474e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10484e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10494e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10504e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10514e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1052a66be287SLois Curfman McInnes   }
10534e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
10544e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
10554e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
10564e220ebcSLois Curfman McInnes 
10573a40ed3dSBarry Smith   PetscFunctionReturn(0);
1058a66be287SLois Curfman McInnes }
1059a66be287SLois Curfman McInnes 
10605615d1e5SSatish Balay #undef __FUNC__
1061d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ"
10628f6be9afSLois Curfman McInnes int MatSetOption_MPIAIJ(Mat A,MatOption op)
1063c74985f6SBarry Smith {
1064c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1065c74985f6SBarry Smith 
10663a40ed3dSBarry Smith   PetscFunctionBegin;
10676d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
10686d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
10696da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1070c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
107196854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1072c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1073b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1074b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1075b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1076aeafbbfcSLois Curfman McInnes     a->roworiented = 1;
1077c0bbcb79SLois Curfman McInnes     MatSetOption(a->A,op);
1078c0bbcb79SLois Curfman McInnes     MatSetOption(a->B,op);
1079b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
10806da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
10816d4a8577SBarry Smith              op == MAT_SYMMETRIC ||
10826d4a8577SBarry Smith              op == MAT_STRUCTURALLY_SYMMETRIC ||
10836d4a8577SBarry Smith              op == MAT_YES_NEW_DIAGONALS)
108494a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10856d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
10864b0e389bSBarry Smith     a->roworiented = 0;
10874b0e389bSBarry Smith     MatSetOption(a->A,op);
10884b0e389bSBarry Smith     MatSetOption(a->B,op);
10892b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
109090f02eecSBarry Smith     a->donotstash = 1;
10913a40ed3dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS){
10923a40ed3dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
10933a40ed3dSBarry Smith   } else {
10943a40ed3dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
10953a40ed3dSBarry Smith   }
10963a40ed3dSBarry Smith   PetscFunctionReturn(0);
1097c74985f6SBarry Smith }
1098c74985f6SBarry Smith 
10995615d1e5SSatish Balay #undef __FUNC__
1100d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ"
11018f6be9afSLois Curfman McInnes int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1102c74985f6SBarry Smith {
110344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
11043a40ed3dSBarry Smith 
11053a40ed3dSBarry Smith   PetscFunctionBegin;
11060752156aSBarry Smith   if (m) *m = mat->M;
11070752156aSBarry Smith   if (n) *n = mat->N;
11083a40ed3dSBarry Smith   PetscFunctionReturn(0);
1109c74985f6SBarry Smith }
1110c74985f6SBarry Smith 
11115615d1e5SSatish Balay #undef __FUNC__
1112d4bb536fSBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ"
11138f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1114c74985f6SBarry Smith {
111544a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
11163a40ed3dSBarry Smith 
11173a40ed3dSBarry Smith   PetscFunctionBegin;
11180752156aSBarry Smith   if (m) *m = mat->m;
11190752156aSBarry Smith   if (n) *n = mat->N;
11203a40ed3dSBarry Smith   PetscFunctionReturn(0);
1121c74985f6SBarry Smith }
1122c74985f6SBarry Smith 
11235615d1e5SSatish Balay #undef __FUNC__
1124d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
11258f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1126c74985f6SBarry Smith {
112744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
11283a40ed3dSBarry Smith 
11293a40ed3dSBarry Smith   PetscFunctionBegin;
1130c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
11313a40ed3dSBarry Smith   PetscFunctionReturn(0);
1132c74985f6SBarry Smith }
1133c74985f6SBarry Smith 
11346d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11356d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11366d84be18SBarry Smith 
11375615d1e5SSatish Balay #undef __FUNC__
11385615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ"
11396d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
114039e00950SLois Curfman McInnes {
1141154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
114270f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1143154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1144154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
114570f0671dSBarry Smith   int        *cmap, *idx_p;
114639e00950SLois Curfman McInnes 
11473a40ed3dSBarry Smith   PetscFunctionBegin;
1148e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
11497a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
11507a0afa10SBarry Smith 
115170f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
11527a0afa10SBarry Smith     /*
11537a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
11547a0afa10SBarry Smith     */
11557a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1156c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
1157c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
11587a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
11597a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
11607a0afa10SBarry Smith     }
11617a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
11627a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
11637a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
11647a0afa10SBarry Smith   }
11657a0afa10SBarry Smith 
1166e3372554SBarry Smith   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1167abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
116839e00950SLois Curfman McInnes 
1169154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1170154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1171154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
117238597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
117338597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1174154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1175154123eaSLois Curfman McInnes 
117670f0671dSBarry Smith   cmap  = mat->garray;
1177154123eaSLois Curfman McInnes   if (v  || idx) {
1178154123eaSLois Curfman McInnes     if (nztot) {
1179154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
118070f0671dSBarry Smith       int imark = -1;
1181154123eaSLois Curfman McInnes       if (v) {
118270f0671dSBarry Smith         *v = v_p = mat->rowvalues;
118339e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
118470f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1185154123eaSLois Curfman McInnes           else break;
1186154123eaSLois Curfman McInnes         }
1187154123eaSLois Curfman McInnes         imark = i;
118870f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
118970f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1190154123eaSLois Curfman McInnes       }
1191154123eaSLois Curfman McInnes       if (idx) {
119270f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
119370f0671dSBarry Smith         if (imark > -1) {
119470f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
119570f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
119670f0671dSBarry Smith           }
119770f0671dSBarry Smith         } else {
1198154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
119970f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1200154123eaSLois Curfman McInnes             else break;
1201154123eaSLois Curfman McInnes           }
1202154123eaSLois Curfman McInnes           imark = i;
120370f0671dSBarry Smith         }
120470f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
120570f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
120639e00950SLois Curfman McInnes       }
120739e00950SLois Curfman McInnes     }
12081ca473b0SSatish Balay     else {
12091ca473b0SSatish Balay       if (idx) *idx = 0;
12101ca473b0SSatish Balay       if (v)   *v   = 0;
12111ca473b0SSatish Balay     }
1212154123eaSLois Curfman McInnes   }
121339e00950SLois Curfman McInnes   *nz = nztot;
121438597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
121538597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
12163a40ed3dSBarry Smith   PetscFunctionReturn(0);
121739e00950SLois Curfman McInnes }
121839e00950SLois Curfman McInnes 
12195615d1e5SSatish Balay #undef __FUNC__
1220d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ"
12216d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
122239e00950SLois Curfman McInnes {
12237a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
12243a40ed3dSBarry Smith 
12253a40ed3dSBarry Smith   PetscFunctionBegin;
12267a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
1227e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
12287a0afa10SBarry Smith   }
12297a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
12303a40ed3dSBarry Smith   PetscFunctionReturn(0);
123139e00950SLois Curfman McInnes }
123239e00950SLois Curfman McInnes 
12335615d1e5SSatish Balay #undef __FUNC__
12345615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ"
12358f6be9afSLois Curfman McInnes int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1236855ac2c5SLois Curfman McInnes {
1237855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1238ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1239416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1240416022c9SBarry Smith   double     sum = 0.0;
124104ca555eSLois Curfman McInnes   Scalar     *v;
124204ca555eSLois Curfman McInnes 
12433a40ed3dSBarry Smith   PetscFunctionBegin;
124417699dbbSLois Curfman McInnes   if (aij->size == 1) {
124514183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
124637fa93a5SLois Curfman McInnes   } else {
124704ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
124804ca555eSLois Curfman McInnes       v = amat->a;
124904ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
12503a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
125104ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
125204ca555eSLois Curfman McInnes #else
125304ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
125404ca555eSLois Curfman McInnes #endif
125504ca555eSLois Curfman McInnes       }
125604ca555eSLois Curfman McInnes       v = bmat->a;
125704ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
12583a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
125904ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
126004ca555eSLois Curfman McInnes #else
126104ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
126204ca555eSLois Curfman McInnes #endif
126304ca555eSLois Curfman McInnes       }
1264ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
126504ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
12663a40ed3dSBarry Smith     } else if (type == NORM_1) { /* max column norm */
126704ca555eSLois Curfman McInnes       double *tmp, *tmp2;
126804ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
1269758f045eSSatish Balay       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1270758f045eSSatish Balay       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1271cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
127204ca555eSLois Curfman McInnes       *norm = 0.0;
127304ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
127404ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1275579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
127604ca555eSLois Curfman McInnes       }
127704ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
127804ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1279579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
128004ca555eSLois Curfman McInnes       }
1281ca161407SBarry Smith       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
128204ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
128304ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
128404ca555eSLois Curfman McInnes       }
12850452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
12863a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
1287515d9167SLois Curfman McInnes       double ntemp = 0.0;
128804ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1289dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
129004ca555eSLois Curfman McInnes         sum = 0.0;
129104ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1292cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
129304ca555eSLois Curfman McInnes         }
1294dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
129504ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1296cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
129704ca555eSLois Curfman McInnes         }
1298515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
129904ca555eSLois Curfman McInnes       }
1300ca161407SBarry Smith       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1301ca161407SBarry Smith     } else {
1302e3372554SBarry Smith       SETERRQ(1,0,"No support for two norm");
130304ca555eSLois Curfman McInnes     }
130437fa93a5SLois Curfman McInnes   }
13053a40ed3dSBarry Smith   PetscFunctionReturn(0);
1306855ac2c5SLois Curfman McInnes }
1307855ac2c5SLois Curfman McInnes 
13085615d1e5SSatish Balay #undef __FUNC__
13095615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ"
13108f6be9afSLois Curfman McInnes int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1311b7c46309SBarry Smith {
1312b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1313dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1314416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1315b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
13163a40ed3dSBarry Smith   Mat        B;
1317b7c46309SBarry Smith   Scalar     *array;
1318b7c46309SBarry Smith 
13193a40ed3dSBarry Smith   PetscFunctionBegin;
1320d4bb536fSBarry Smith   if (matout == PETSC_NULL && M != N) {
1321e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
1322d4bb536fSBarry Smith   }
1323d4bb536fSBarry Smith 
1324d4bb536fSBarry Smith   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1325b7c46309SBarry Smith 
1326b7c46309SBarry Smith   /* copy over the A part */
1327ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1328b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1329b7c46309SBarry Smith   row = a->rstart;
1330dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1331b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1332416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1333b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1334b7c46309SBarry Smith   }
1335b7c46309SBarry Smith   aj = Aloc->j;
13364af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1337b7c46309SBarry Smith 
1338b7c46309SBarry Smith   /* copy over the B part */
1339ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1340b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1341b7c46309SBarry Smith   row = a->rstart;
13420452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1343dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1344b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1345416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1346b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1347b7c46309SBarry Smith   }
13480452661fSBarry Smith   PetscFree(ct);
13496d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
13506d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
13513638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
13520de55854SLois Curfman McInnes     *matout = B;
13530de55854SLois Curfman McInnes   } else {
13540de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
13550452661fSBarry Smith     PetscFree(a->rowners);
13560de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
13570de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
13580452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
13590452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
13600de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1361a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
13620452661fSBarry Smith     PetscFree(a);
1363f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
13640452661fSBarry Smith     PetscHeaderDestroy(B);
13650de55854SLois Curfman McInnes   }
13663a40ed3dSBarry Smith   PetscFunctionReturn(0);
1367b7c46309SBarry Smith }
1368b7c46309SBarry Smith 
13695615d1e5SSatish Balay #undef __FUNC__
13705615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ"
13714b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1372a008b906SSatish Balay {
13734b967eb1SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
13744b967eb1SSatish Balay   Mat a = aij->A, b = aij->B;
1375a008b906SSatish Balay   int ierr,s1,s2,s3;
1376a008b906SSatish Balay 
13773a40ed3dSBarry Smith   PetscFunctionBegin;
13784b967eb1SSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
13794b967eb1SSatish Balay   if (rr) {
13804b967eb1SSatish Balay     s3 = aij->n;
13814b967eb1SSatish Balay     VecGetLocalSize_Fast(rr,s1);
1382e3372554SBarry Smith     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
13834b967eb1SSatish Balay     /* Overlap communication with computation. */
138443a90d84SBarry Smith     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1385a008b906SSatish Balay   }
13864b967eb1SSatish Balay   if (ll) {
13874b967eb1SSatish Balay     VecGetLocalSize_Fast(ll,s1);
1388e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1389c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
13904b967eb1SSatish Balay   }
13914b967eb1SSatish Balay   /* scale  the diagonal block */
1392c351683dSSatish Balay   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
13934b967eb1SSatish Balay 
13944b967eb1SSatish Balay   if (rr) {
13954b967eb1SSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
139643a90d84SBarry Smith     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1397c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
13984b967eb1SSatish Balay   }
13994b967eb1SSatish Balay 
14003a40ed3dSBarry Smith   PetscFunctionReturn(0);
1401a008b906SSatish Balay }
1402a008b906SSatish Balay 
1403a008b906SSatish Balay 
1404682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
14055615d1e5SSatish Balay #undef __FUNC__
1406d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ"
14078f6be9afSLois Curfman McInnes int MatPrintHelp_MPIAIJ(Mat A)
1408682d7d0cSBarry Smith {
1409682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
14103a40ed3dSBarry Smith   int        ierr;
1411682d7d0cSBarry Smith 
14123a40ed3dSBarry Smith   PetscFunctionBegin;
14133a40ed3dSBarry Smith   if (!a->rank) {
14143a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
14153a40ed3dSBarry Smith   }
14163a40ed3dSBarry Smith   PetscFunctionReturn(0);
1417682d7d0cSBarry Smith }
1418682d7d0cSBarry Smith 
14195615d1e5SSatish Balay #undef __FUNC__
1420d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ"
14218f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
14225a838052SSatish Balay {
14233a40ed3dSBarry Smith   PetscFunctionBegin;
14245a838052SSatish Balay   *bs = 1;
14253a40ed3dSBarry Smith   PetscFunctionReturn(0);
14265a838052SSatish Balay }
14275615d1e5SSatish Balay #undef __FUNC__
1428d4bb536fSBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ"
14298f6be9afSLois Curfman McInnes int MatSetUnfactored_MPIAIJ(Mat A)
1430bb5a7306SBarry Smith {
1431bb5a7306SBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1432bb5a7306SBarry Smith   int        ierr;
14333a40ed3dSBarry Smith 
14343a40ed3dSBarry Smith   PetscFunctionBegin;
1435bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
14363a40ed3dSBarry Smith   PetscFunctionReturn(0);
1437bb5a7306SBarry Smith }
1438bb5a7306SBarry Smith 
1439d4bb536fSBarry Smith #undef __FUNC__
1440d4bb536fSBarry Smith #define __FUNC__ "MatEqual_MPIAIJ"
1441d4bb536fSBarry Smith int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1442d4bb536fSBarry Smith {
1443d4bb536fSBarry Smith   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1444d4bb536fSBarry Smith   Mat        a, b, c, d;
1445d4bb536fSBarry Smith   PetscTruth flg;
1446d4bb536fSBarry Smith   int        ierr;
1447d4bb536fSBarry Smith 
14483a40ed3dSBarry Smith   PetscFunctionBegin;
1449d4bb536fSBarry Smith   if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type");
1450d4bb536fSBarry Smith   a = matA->A; b = matA->B;
1451d4bb536fSBarry Smith   c = matB->A; d = matB->B;
1452d4bb536fSBarry Smith 
1453d4bb536fSBarry Smith   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1454d4bb536fSBarry Smith   if (flg == PETSC_TRUE) {
1455d4bb536fSBarry Smith     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1456d4bb536fSBarry Smith   }
1457ca161407SBarry Smith   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
14583a40ed3dSBarry Smith   PetscFunctionReturn(0);
1459d4bb536fSBarry Smith }
1460d4bb536fSBarry Smith 
14618f6be9afSLois Curfman McInnes extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
14622f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
14630a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
14640a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
146500e6dbe6SBarry Smith extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,MatGetSubMatrixCall,Mat *);
146600e6dbe6SBarry Smith 
14678a729477SBarry Smith /* -------------------------------------------------------------------*/
14682ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
146939e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
147044a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
147144a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
147236ce4990SBarry Smith        0,0,
147336ce4990SBarry Smith        0,0,
147436ce4990SBarry Smith        0,0,
147544a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1476b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1477d4bb536fSBarry Smith        MatGetInfo_MPIAIJ,MatEqual_MPIAIJ,
1478a008b906SSatish Balay        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1479ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
14801eb62cbbSBarry Smith        0,
1481299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
148236ce4990SBarry Smith        0,0,0,0,
1483d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
148436ce4990SBarry Smith        0,0,
148594a9d846SBarry Smith        0,0,MatConvertSameType_MPIAIJ,0,0,
1486b49de8d1SLois Curfman McInnes        0,0,0,
1487598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1488052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
14893b2fbd54SBarry Smith        MatScale_MPIAIJ,0,0,0,
14900a198c4cSBarry Smith        MatGetBlockSize_MPIAIJ,0,0,0,0,
149100e6dbe6SBarry Smith        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ,
1492ca161407SBarry Smith        0,0,MatGetSubMatrix_MPIAIJ};
149336ce4990SBarry Smith 
14948a729477SBarry Smith 
14955615d1e5SSatish Balay #undef __FUNC__
14965615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ"
14971987afe7SBarry Smith /*@C
1498ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
14993a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
15003a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
15013a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
15023a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
15038a729477SBarry Smith 
15048a729477SBarry Smith    Input Parameters:
15051eb62cbbSBarry Smith .  comm - MPI communicator
15067d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
150792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
150892e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
15091a3896d6SBarry Smith .  n - This value should be the same as the local size used in creating the
15101a3896d6SBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
15111a3896d6SBarry Smith        calculated if N is given) For square matrices n is almost always m.
15127d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
151392e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1514ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1515ff756334SLois Curfman McInnes            (same for all local rows)
15162bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
151792e8d321SLois Curfman McInnes            diagonal portion of the local submatrix (possibly different for each row)
15182bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
15192bd5e0b2SLois Curfman McInnes            it is zero.
15202bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1521ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
15222bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
15232bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
15242bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
15258a729477SBarry Smith 
1526ff756334SLois Curfman McInnes    Output Parameter:
152744cd7ae7SLois Curfman McInnes .  A - the matrix
15288a729477SBarry Smith 
1529ff756334SLois Curfman McInnes    Notes:
1530ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1531ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
15320002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
15330002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1534ff756334SLois Curfman McInnes 
1535ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1536ff756334SLois Curfman McInnes    (possibly both).
1537ff756334SLois Curfman McInnes 
15385511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
15395511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
15405511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
15415511cfe3SLois Curfman McInnes 
15425511cfe3SLois Curfman McInnes    Options Database Keys:
15435511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
15446e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
15456e62573dSLois Curfman McInnes $        (max limit=5)
15466e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
15476e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
15486e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
15495511cfe3SLois Curfman McInnes 
1550e0245417SLois Curfman McInnes    Storage Information:
1551e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1552e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1553e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1554e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1555e0245417SLois Curfman McInnes 
1556e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
15575ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
15585ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
15595ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
15605ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1561ff756334SLois Curfman McInnes 
15625511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
15635511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
15642191d07cSBarry Smith 
1565b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1566b810aeb4SBarry Smith $         -------------------
15675511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
15685511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
15695511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
15705511cfe3SLois Curfman McInnes $         -------------------
1571b810aeb4SBarry Smith $
1572b810aeb4SBarry Smith 
15735511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
15745511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
15755511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
15765511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
15775511cfe3SLois Curfman McInnes 
15785511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
15795511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
15805511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
15813d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
158292e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
15836da5968aSLois Curfman McInnes    matrices.
15843a511b96SLois Curfman McInnes 
1585dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1586ff756334SLois Curfman McInnes 
1587fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
15888a729477SBarry Smith @*/
15891eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
159044cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
15918a729477SBarry Smith {
159244cd7ae7SLois Curfman McInnes   Mat          B;
159344cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
159436ce4990SBarry Smith   int          ierr, i,sum[2],work[2],size;
1595416022c9SBarry Smith 
15963a40ed3dSBarry Smith   PetscFunctionBegin;
15973914022bSBarry Smith   MPI_Comm_size(comm,&size);
15983914022bSBarry Smith   if (size == 1) {
15993914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
16003914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
16013914022bSBarry Smith     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
16023a40ed3dSBarry Smith     PetscFunctionReturn(0);
16033914022bSBarry Smith   }
16043914022bSBarry Smith 
160544cd7ae7SLois Curfman McInnes   *A = 0;
1606d4bb536fSBarry Smith   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
160744cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
160844cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
160944cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
161044cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
161144cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
161244cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
161344cd7ae7SLois Curfman McInnes   B->factor     = 0;
161444cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
161590f02eecSBarry Smith   B->mapping    = 0;
1616d6dfbf8fSBarry Smith 
161747794344SBarry Smith   B->insertmode = NOT_SET_VALUES;
16189eb4d147SSatish Balay   b->size       = size;
161944cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
16201eb62cbbSBarry Smith 
16213a40ed3dSBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1622e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
16233a40ed3dSBarry Smith   }
16241987afe7SBarry Smith 
1625dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
16261eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1627ca161407SBarry Smith     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1628dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1629dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
16301eb62cbbSBarry Smith   }
163144cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
163244cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
163344cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
163444cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
163544cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
163644cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
16371eb62cbbSBarry Smith 
16381eb62cbbSBarry Smith   /* build local table of row and column ownerships */
163944cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1640f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1641603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
1642ca161407SBarry Smith   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
164344cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
164444cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
164544cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
16468a729477SBarry Smith   }
164744cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
164844cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
1649ca161407SBarry Smith   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
165044cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
165144cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
165244cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
16538a729477SBarry Smith   }
165444cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
165544cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
16568a729477SBarry Smith 
16575ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1658029af93fSBarry Smith   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
165944cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
16607b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1661029af93fSBarry Smith   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
166244cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
16638a729477SBarry Smith 
16641eb62cbbSBarry Smith   /* build cache for off array entries formed */
166544cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
166690f02eecSBarry Smith   b->donotstash  = 0;
166744cd7ae7SLois Curfman McInnes   b->colmap      = 0;
166844cd7ae7SLois Curfman McInnes   b->garray      = 0;
166944cd7ae7SLois Curfman McInnes   b->roworiented = 1;
16708a729477SBarry Smith 
16711eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
167244cd7ae7SLois Curfman McInnes   b->lvec      = 0;
167344cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
16748a729477SBarry Smith 
16757a0afa10SBarry Smith   /* stuff for MatGetRow() */
167644cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
167744cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
167844cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
16797a0afa10SBarry Smith 
168044cd7ae7SLois Curfman McInnes   *A = B;
16813a40ed3dSBarry Smith   PetscFunctionReturn(0);
1682d6dfbf8fSBarry Smith }
1683c74985f6SBarry Smith 
16845615d1e5SSatish Balay #undef __FUNC__
16855615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ"
16868f6be9afSLois Curfman McInnes int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1687d6dfbf8fSBarry Smith {
1688d6dfbf8fSBarry Smith   Mat        mat;
1689416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1690a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1691d6dfbf8fSBarry Smith 
16923a40ed3dSBarry Smith   PetscFunctionBegin;
1693416022c9SBarry Smith   *newmat       = 0;
1694d4bb536fSBarry Smith   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1695a5a9c739SBarry Smith   PLogObjectCreate(mat);
16960452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1697416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
169844a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
169944a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1700d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1701c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1702d6dfbf8fSBarry Smith 
170344cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
170444cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
170544cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
170644cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1707d6dfbf8fSBarry Smith 
1708416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1709416022c9SBarry Smith   a->rend         = oldmat->rend;
1710416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1711416022c9SBarry Smith   a->cend         = oldmat->cend;
171217699dbbSLois Curfman McInnes   a->size         = oldmat->size;
171317699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
171447794344SBarry Smith   mat->insertmode = NOT_SET_VALUES;
1715bcd2baecSBarry Smith   a->rowvalues    = 0;
1716bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1717d6dfbf8fSBarry Smith 
1718603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1719f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1720603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1721603f58a4SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1722416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
17232ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
17240452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1725416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1726416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1727416022c9SBarry Smith   } else a->colmap = 0;
17283f41c07dSBarry Smith   if (oldmat->garray) {
17293f41c07dSBarry Smith     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
17303f41c07dSBarry Smith     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1731464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
17323f41c07dSBarry Smith     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1733416022c9SBarry Smith   } else a->garray = 0;
1734d6dfbf8fSBarry Smith 
1735416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1736416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1737a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1738416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1739416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1740416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1741416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1742416022c9SBarry Smith   PLogObjectParent(mat,a->B);
17435dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
174425cdf11fSBarry Smith   if (flg) {
1745682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1746682d7d0cSBarry Smith   }
17478a729477SBarry Smith   *newmat = mat;
17483a40ed3dSBarry Smith   PetscFunctionReturn(0);
17498a729477SBarry Smith }
1750416022c9SBarry Smith 
175177c4ece6SBarry Smith #include "sys.h"
1752416022c9SBarry Smith 
17535615d1e5SSatish Balay #undef __FUNC__
17545615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ"
175519bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1756416022c9SBarry Smith {
1757d65a2f8fSBarry Smith   Mat          A;
1758d65a2f8fSBarry Smith   Scalar       *vals,*svals;
175919bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1760416022c9SBarry Smith   MPI_Status   status;
17613a40ed3dSBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
176217699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1763d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
176419bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1765416022c9SBarry Smith 
17663a40ed3dSBarry Smith   PetscFunctionBegin;
176717699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
176817699dbbSLois Curfman McInnes   if (!rank) {
176990ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
17700752156aSBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1771e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1772d64ed03dSBarry Smith     if (header[3] < 0) {
1773d64ed03dSBarry Smith       SETERRQ(1,1,"Matrix stored in special format on disk, cannot load as MPIAIJ");
1774d64ed03dSBarry Smith     }
17756c5fab8fSBarry Smith   }
17766c5fab8fSBarry Smith 
1777d64ed03dSBarry Smith 
1778ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1779416022c9SBarry Smith   M = header[1]; N = header[2];
1780416022c9SBarry Smith   /* determine ownership of all rows */
178117699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
17820452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1783ca161407SBarry Smith   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1784416022c9SBarry Smith   rowners[0] = 0;
178517699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1786416022c9SBarry Smith     rowners[i] += rowners[i-1];
1787416022c9SBarry Smith   }
178817699dbbSLois Curfman McInnes   rstart = rowners[rank];
178917699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1790416022c9SBarry Smith 
1791416022c9SBarry Smith   /* distribute row lengths to all processors */
17920452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1793416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
179417699dbbSLois Curfman McInnes   if (!rank) {
17950452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
17960752156aSBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
17970452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
179817699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1799ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
18000452661fSBarry Smith     PetscFree(sndcounts);
18013a40ed3dSBarry Smith   } else {
1802ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1803416022c9SBarry Smith   }
1804416022c9SBarry Smith 
180517699dbbSLois Curfman McInnes   if (!rank) {
1806416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
18070452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1808cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
180917699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1810416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1811416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1812416022c9SBarry Smith       }
1813416022c9SBarry Smith     }
18140452661fSBarry Smith     PetscFree(rowlengths);
1815416022c9SBarry Smith 
1816416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1817416022c9SBarry Smith     maxnz = 0;
181817699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
18190452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1820416022c9SBarry Smith     }
18210452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1822416022c9SBarry Smith 
1823416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1824416022c9SBarry Smith     nz     = procsnz[0];
18250452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
18260752156aSBarry Smith     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1827d65a2f8fSBarry Smith 
1828d65a2f8fSBarry Smith     /* read in every one elses and ship off */
182917699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1830d65a2f8fSBarry Smith       nz   = procsnz[i];
18310752156aSBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1832ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1833d65a2f8fSBarry Smith     }
18340452661fSBarry Smith     PetscFree(cols);
18353a40ed3dSBarry Smith   } else {
1836416022c9SBarry Smith     /* determine buffer space needed for message */
1837416022c9SBarry Smith     nz = 0;
1838416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1839416022c9SBarry Smith       nz += ourlens[i];
1840416022c9SBarry Smith     }
18410452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1842416022c9SBarry Smith 
1843416022c9SBarry Smith     /* receive message of column indices*/
1844ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1845ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1846e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1847416022c9SBarry Smith   }
1848416022c9SBarry Smith 
1849416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1850cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1851416022c9SBarry Smith   jj = 0;
1852416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1853416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1854d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1855416022c9SBarry Smith       jj++;
1856416022c9SBarry Smith     }
1857416022c9SBarry Smith   }
1858d65a2f8fSBarry Smith 
1859d65a2f8fSBarry Smith   /* create our matrix */
1860416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1861416022c9SBarry Smith     ourlens[i] -= offlens[i];
1862416022c9SBarry Smith   }
1863d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1864d65a2f8fSBarry Smith   A = *newmat;
18656d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
1866d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1867d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1868d65a2f8fSBarry Smith   }
1869416022c9SBarry Smith 
187017699dbbSLois Curfman McInnes   if (!rank) {
18710452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1872416022c9SBarry Smith 
1873416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1874416022c9SBarry Smith     nz = procsnz[0];
18750752156aSBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1876d65a2f8fSBarry Smith 
1877d65a2f8fSBarry Smith     /* insert into matrix */
1878d65a2f8fSBarry Smith     jj      = rstart;
1879d65a2f8fSBarry Smith     smycols = mycols;
1880d65a2f8fSBarry Smith     svals   = vals;
1881d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1882d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1883d65a2f8fSBarry Smith       smycols += ourlens[i];
1884d65a2f8fSBarry Smith       svals   += ourlens[i];
1885d65a2f8fSBarry Smith       jj++;
1886416022c9SBarry Smith     }
1887416022c9SBarry Smith 
1888d65a2f8fSBarry Smith     /* read in other processors and ship out */
188917699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1890416022c9SBarry Smith       nz   = procsnz[i];
18910752156aSBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1892ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1893416022c9SBarry Smith     }
18940452661fSBarry Smith     PetscFree(procsnz);
18953a40ed3dSBarry Smith   } else {
1896d65a2f8fSBarry Smith     /* receive numeric values */
18970452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1898416022c9SBarry Smith 
1899d65a2f8fSBarry Smith     /* receive message of values*/
1900ca161407SBarry Smith     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1901ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1902e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1903d65a2f8fSBarry Smith 
1904d65a2f8fSBarry Smith     /* insert into matrix */
1905d65a2f8fSBarry Smith     jj      = rstart;
1906d65a2f8fSBarry Smith     smycols = mycols;
1907d65a2f8fSBarry Smith     svals   = vals;
1908d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1909d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1910d65a2f8fSBarry Smith       smycols += ourlens[i];
1911d65a2f8fSBarry Smith       svals   += ourlens[i];
1912d65a2f8fSBarry Smith       jj++;
1913d65a2f8fSBarry Smith     }
1914d65a2f8fSBarry Smith   }
19150452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1916d65a2f8fSBarry Smith 
19176d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
19186d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
19193a40ed3dSBarry Smith   PetscFunctionReturn(0);
1920416022c9SBarry Smith }
1921a0ff6018SBarry Smith 
192229da9460SBarry Smith #undef __FUNC__
192329da9460SBarry Smith #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
1924a0ff6018SBarry Smith /*
192529da9460SBarry Smith     Not great since it makes two copies of the submatrix, first an SeqAIJ
192629da9460SBarry Smith   in local and then by concatenating the local matrices the end result.
192729da9460SBarry Smith   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1928a0ff6018SBarry Smith */
1929a0ff6018SBarry Smith int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatGetSubMatrixCall call,Mat *newmat)
1930a0ff6018SBarry Smith {
193100e6dbe6SBarry Smith   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1932a0ff6018SBarry Smith   Mat        *local,M;
193300e6dbe6SBarry Smith   Scalar     *vwork,*aa;
193400e6dbe6SBarry Smith   MPI_Comm   comm = mat->comm;
193500e6dbe6SBarry Smith   Mat_SeqAIJ *aij;
193600e6dbe6SBarry Smith   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
1937a0ff6018SBarry Smith 
1938a0ff6018SBarry Smith   PetscFunctionBegin;
193900e6dbe6SBarry Smith   MPI_Comm_rank(comm,&rank);
194000e6dbe6SBarry Smith   MPI_Comm_size(comm,&size);
194100e6dbe6SBarry Smith 
1942a0ff6018SBarry Smith   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1943a0ff6018SBarry Smith 
1944a0ff6018SBarry Smith   /*
1945a0ff6018SBarry Smith       m - number of local rows
1946a0ff6018SBarry Smith       n - number of columns (same on all processors)
1947a0ff6018SBarry Smith       rstart - first row in new global matrix generated
1948a0ff6018SBarry Smith   */
1949a0ff6018SBarry Smith   ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr);
1950a0ff6018SBarry Smith   if (call == MAT_INITIAL_MATRIX) {
195100e6dbe6SBarry Smith     aij = (Mat_SeqAIJ *) (*local)->data;
195200e6dbe6SBarry Smith     if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix");
195300e6dbe6SBarry Smith     ii  = aij->i;
195400e6dbe6SBarry Smith     jj  = aij->j;
195500e6dbe6SBarry Smith 
1956a0ff6018SBarry Smith     /*
195700e6dbe6SBarry Smith         Determine the number of non-zeros in the diagonal and off-diagonal
195800e6dbe6SBarry Smith         portions of the matrix in order to do correct preallocation
1959a0ff6018SBarry Smith     */
196000e6dbe6SBarry Smith 
196100e6dbe6SBarry Smith     /* first get start and end of "diagonal" columns */
196200e6dbe6SBarry Smith     nlocal = n/size + ((n % size) > rank);
1963ca161407SBarry Smith     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
196400e6dbe6SBarry Smith     rstart = rend - nlocal;
196500e6dbe6SBarry Smith 
196600e6dbe6SBarry Smith     /* next, compute all the lengths */
196700e6dbe6SBarry Smith     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
196800e6dbe6SBarry Smith     olens = dlens + m;
196900e6dbe6SBarry Smith     for ( i=0; i<m; i++ ) {
197000e6dbe6SBarry Smith       jend = ii[i+1] - ii[i];
197100e6dbe6SBarry Smith       olen = 0;
197200e6dbe6SBarry Smith       dlen = 0;
197300e6dbe6SBarry Smith       for ( j=0; j<jend; j++ ) {
197400e6dbe6SBarry Smith         if ( *jj < rstart || *jj >= rend) olen++;
197500e6dbe6SBarry Smith         else dlen++;
197600e6dbe6SBarry Smith         jj++;
197700e6dbe6SBarry Smith       }
197800e6dbe6SBarry Smith       olens[i] = olen;
197900e6dbe6SBarry Smith       dlens[i] = dlen;
198000e6dbe6SBarry Smith     }
198100e6dbe6SBarry Smith     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
198200e6dbe6SBarry Smith     PetscFree(dlens);
1983a0ff6018SBarry Smith   } else {
1984a0ff6018SBarry Smith     int ml,nl;
1985a0ff6018SBarry Smith 
1986a0ff6018SBarry Smith     M = *newmat;
1987a0ff6018SBarry Smith     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1988a0ff6018SBarry Smith     if (ml != m) SETERRQ(1,1,"Previous matrix must be same size/layout as request");
1989a0ff6018SBarry Smith     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1990a0ff6018SBarry Smith   }
1991a0ff6018SBarry Smith   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
199200e6dbe6SBarry Smith   aij = (Mat_SeqAIJ *) (*local)->data;
199300e6dbe6SBarry Smith   if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix");
199400e6dbe6SBarry Smith   ii  = aij->i;
199500e6dbe6SBarry Smith   jj  = aij->j;
199600e6dbe6SBarry Smith   aa  = aij->a;
1997a0ff6018SBarry Smith   for (i=0; i<m; i++) {
1998a0ff6018SBarry Smith     row   = rstart + i;
199900e6dbe6SBarry Smith     nz    = ii[i+1] - ii[i];
200000e6dbe6SBarry Smith     cwork = jj;     jj += nz;
200100e6dbe6SBarry Smith     vwork = aa;     aa += nz;
20028c638d02SBarry Smith     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2003a0ff6018SBarry Smith   }
2004a0ff6018SBarry Smith 
2005a0ff6018SBarry Smith   ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr);
2006a0ff6018SBarry Smith   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2007a0ff6018SBarry Smith   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2008a0ff6018SBarry Smith   *newmat = M;
2009a0ff6018SBarry Smith   PetscFunctionReturn(0);
2010a0ff6018SBarry Smith }
2011