xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6831982abb6453c2f3e25efecb5d0951d333371e)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*6831982aSBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.304 1999/10/13 20:37:20 bsmith Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
7d9942c19SSatish Balay #include "src/inline/spops.h"
88a729477SBarry Smith 
9bc5ccf88SSatish Balay extern int MatSetUpMultiply_MPIAIJ(Mat);
10bc5ccf88SSatish Balay extern int DisAssemble_MPIAIJ(Mat);
11bc5ccf88SSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12bc5ccf88SSatish Balay extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13bc5ccf88SSatish Balay extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14bc5ccf88SSatish Balay extern int MatPrintHelp_SeqAIJ(Mat);
15bc5ccf88SSatish Balay 
160f5bd95cSBarry Smith /*
170f5bd95cSBarry Smith   Local utility routine that creates a mapping from the global column
189e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
190f5bd95cSBarry Smith storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
200f5bd95cSBarry Smith a slightly higher hash table cost; without it it is not scalable (each processor
210f5bd95cSBarry Smith has an order N integer array but is fast to acess.
229e25ed09SBarry Smith */
235615d1e5SSatish Balay #undef __FUNC__
24d4bb536fSBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private"
250a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat)
269e25ed09SBarry Smith {
2744a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
28ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
29dc2900e9SSatish Balay   int        n = B->n,i,ierr;
30dbb450caSBarry Smith 
313a40ed3dSBarry Smith   PetscFunctionBegin;
32aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
330f5bd95cSBarry Smith   ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
34b1fc9764SSatish Balay   for ( i=0; i<n; i++ ){
350f5bd95cSBarry Smith     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
36b1fc9764SSatish Balay   }
37b1fc9764SSatish Balay #else
38758f045eSSatish Balay   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
39464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
40549d3d68SSatish Balay   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
41905e6a2fSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
42b1fc9764SSatish Balay #endif
433a40ed3dSBarry Smith   PetscFunctionReturn(0);
449e25ed09SBarry Smith }
459e25ed09SBarry Smith 
460520107fSSatish Balay #define CHUNKSIZE   15
4730770e4dSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
480520107fSSatish Balay { \
490520107fSSatish Balay  \
500520107fSSatish Balay     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
5130770e4dSSatish Balay     rmax = aimax[row]; nrow = ailen[row];  \
52f5e9677aSSatish Balay     col1 = col - shift; \
53f5e9677aSSatish Balay      \
54ba4e3ef2SSatish Balay     low = 0; high = nrow; \
55ba4e3ef2SSatish Balay     while (high-low > 5) { \
56ba4e3ef2SSatish Balay       t = (low+high)/2; \
57ba4e3ef2SSatish Balay       if (rp[t] > col) high = t; \
58ba4e3ef2SSatish Balay       else             low  = t; \
59ba4e3ef2SSatish Balay     } \
600520107fSSatish Balay       for ( _i=0; _i<nrow; _i++ ) { \
61f5e9677aSSatish Balay         if (rp[_i] > col1) break; \
62f5e9677aSSatish Balay         if (rp[_i] == col1) { \
630520107fSSatish Balay           if (addv == ADD_VALUES) ap[_i] += value;   \
640520107fSSatish Balay           else                  ap[_i] = value; \
6530770e4dSSatish Balay           goto a_noinsert; \
660520107fSSatish Balay         } \
670520107fSSatish Balay       }  \
6889280ab3SLois Curfman McInnes       if (nonew == 1) goto a_noinsert; \
69a8c6a408SBarry Smith       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
700520107fSSatish Balay       if (nrow >= rmax) { \
710520107fSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
720520107fSSatish Balay         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
730520107fSSatish Balay         Scalar *new_a; \
740520107fSSatish Balay  \
75a8c6a408SBarry Smith         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
7689280ab3SLois Curfman McInnes  \
770520107fSSatish Balay         /* malloc new storage space */ \
780520107fSSatish Balay         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
790520107fSSatish Balay         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
800520107fSSatish Balay         new_j   = (int *) (new_a + new_nz); \
810520107fSSatish Balay         new_i   = new_j + new_nz; \
820520107fSSatish Balay  \
830520107fSSatish Balay         /* copy over old data into new slots */ \
840520107fSSatish Balay         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
850520107fSSatish Balay         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
86549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
870520107fSSatish Balay         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
88549d3d68SSatish Balay         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
89549d3d68SSatish Balay                                                            len*sizeof(int));CHKERRQ(ierr); \
90549d3d68SSatish Balay         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
91549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
92549d3d68SSatish Balay                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
930520107fSSatish Balay         /* free up old matrix storage */ \
94f5e9677aSSatish Balay  \
95606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
96606d414cSSatish Balay         if (!a->singlemalloc) { \
97606d414cSSatish Balay            ierr = PetscFree(a->i);CHKERRQ(ierr); \
98606d414cSSatish Balay            ierr = PetscFree(a->j);CHKERRQ(ierr); \
99606d414cSSatish Balay         } \
1000520107fSSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1010520107fSSatish Balay         a->singlemalloc = 1; \
1020520107fSSatish Balay  \
1030520107fSSatish Balay         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
10430770e4dSSatish Balay         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
1050520107fSSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
1060520107fSSatish Balay         a->maxnz += CHUNKSIZE; \
1070520107fSSatish Balay         a->reallocs++; \
1080520107fSSatish Balay       } \
1090520107fSSatish Balay       N = nrow++ - 1; a->nz++; \
1100520107fSSatish Balay       /* shift up all the later entries in this row */ \
1110520107fSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
1120520107fSSatish Balay         rp[ii+1] = rp[ii]; \
1130520107fSSatish Balay         ap[ii+1] = ap[ii]; \
1140520107fSSatish Balay       } \
115f5e9677aSSatish Balay       rp[_i] = col1;  \
1160520107fSSatish Balay       ap[_i] = value;  \
11730770e4dSSatish Balay       a_noinsert: ; \
1180520107fSSatish Balay       ailen[row] = nrow; \
1190520107fSSatish Balay }
1200a198c4cSBarry Smith 
12130770e4dSSatish Balay #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
12230770e4dSSatish Balay { \
12330770e4dSSatish Balay  \
12430770e4dSSatish Balay     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
12530770e4dSSatish Balay     rmax = bimax[row]; nrow = bilen[row];  \
12630770e4dSSatish Balay     col1 = col - shift; \
12730770e4dSSatish Balay      \
128ba4e3ef2SSatish Balay     low = 0; high = nrow; \
129ba4e3ef2SSatish Balay     while (high-low > 5) { \
130ba4e3ef2SSatish Balay       t = (low+high)/2; \
131ba4e3ef2SSatish Balay       if (rp[t] > col) high = t; \
132ba4e3ef2SSatish Balay       else             low  = t; \
133ba4e3ef2SSatish Balay     } \
13430770e4dSSatish Balay        for ( _i=0; _i<nrow; _i++ ) { \
13530770e4dSSatish Balay         if (rp[_i] > col1) break; \
13630770e4dSSatish Balay         if (rp[_i] == col1) { \
13730770e4dSSatish Balay           if (addv == ADD_VALUES) ap[_i] += value;   \
13830770e4dSSatish Balay           else                  ap[_i] = value; \
13930770e4dSSatish Balay           goto b_noinsert; \
14030770e4dSSatish Balay         } \
14130770e4dSSatish Balay       }  \
14289280ab3SLois Curfman McInnes       if (nonew == 1) goto b_noinsert; \
143a8c6a408SBarry Smith       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
14430770e4dSSatish Balay       if (nrow >= rmax) { \
14530770e4dSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
14674c639caSSatish Balay         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
14730770e4dSSatish Balay         Scalar *new_a; \
14830770e4dSSatish Balay  \
149a8c6a408SBarry Smith         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
15089280ab3SLois Curfman McInnes  \
15130770e4dSSatish Balay         /* malloc new storage space */ \
15274c639caSSatish Balay         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
15330770e4dSSatish Balay         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
15430770e4dSSatish Balay         new_j   = (int *) (new_a + new_nz); \
15530770e4dSSatish Balay         new_i   = new_j + new_nz; \
15630770e4dSSatish Balay  \
15730770e4dSSatish Balay         /* copy over old data into new slots */ \
15830770e4dSSatish Balay         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
15974c639caSSatish Balay         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
160549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
16130770e4dSSatish Balay         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
162549d3d68SSatish Balay         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
163549d3d68SSatish Balay                                                            len*sizeof(int));CHKERRQ(ierr); \
164549d3d68SSatish Balay         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
165549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
166549d3d68SSatish Balay                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
16730770e4dSSatish Balay         /* free up old matrix storage */ \
16830770e4dSSatish Balay  \
169606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
170606d414cSSatish Balay         if (!b->singlemalloc) { \
171606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
172606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
173606d414cSSatish Balay         } \
17474c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
17574c639caSSatish Balay         b->singlemalloc = 1; \
17630770e4dSSatish Balay  \
17730770e4dSSatish Balay         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
17830770e4dSSatish Balay         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
17974c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
18074c639caSSatish Balay         b->maxnz += CHUNKSIZE; \
18174c639caSSatish Balay         b->reallocs++; \
18230770e4dSSatish Balay       } \
18374c639caSSatish Balay       N = nrow++ - 1; b->nz++; \
18430770e4dSSatish Balay       /* shift up all the later entries in this row */ \
18530770e4dSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
18630770e4dSSatish Balay         rp[ii+1] = rp[ii]; \
18730770e4dSSatish Balay         ap[ii+1] = ap[ii]; \
18830770e4dSSatish Balay       } \
18930770e4dSSatish Balay       rp[_i] = col1;  \
19030770e4dSSatish Balay       ap[_i] = value;  \
19130770e4dSSatish Balay       b_noinsert: ; \
19230770e4dSSatish Balay       bilen[row] = nrow; \
19330770e4dSSatish Balay }
19430770e4dSSatish Balay 
1955615d1e5SSatish Balay #undef __FUNC__
1965615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ"
1978f6be9afSLois Curfman McInnes int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
1988a729477SBarry Smith {
19944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
2004b0e389bSBarry Smith   Scalar     value;
2011eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
2021eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
203905e6a2fSBarry Smith   int        roworiented = aij->roworiented;
2048a729477SBarry Smith 
2050520107fSSatish Balay   /* Some Variables required in the macro */
2064ee7247eSSatish Balay   Mat        A = aij->A;
2074ee7247eSSatish Balay   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
20830770e4dSSatish Balay   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
20930770e4dSSatish Balay   Scalar     *aa = a->a;
21030770e4dSSatish Balay 
21130770e4dSSatish Balay   Mat        B = aij->B;
21230770e4dSSatish Balay   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
21330770e4dSSatish Balay   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
21430770e4dSSatish Balay   Scalar     *ba = b->a;
21530770e4dSSatish Balay 
216ba4e3ef2SSatish Balay   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
21730770e4dSSatish Balay   int        nonew = a->nonew,shift = a->indexshift;
21830770e4dSSatish Balay   Scalar     *ap;
2194ee7247eSSatish Balay 
2203a40ed3dSBarry Smith   PetscFunctionBegin;
2218a729477SBarry Smith   for ( i=0; i<m; i++ ) {
2225ef9f2a5SBarry Smith     if (im[i] < 0) continue;
223aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
224a8c6a408SBarry Smith     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
2250a198c4cSBarry Smith #endif
2264b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
2274b0e389bSBarry Smith       row = im[i] - rstart;
2281eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
2294b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
2304b0e389bSBarry Smith           col = in[j] - cstart;
2314b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
23230770e4dSSatish Balay           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
2330520107fSSatish Balay           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
2341eb62cbbSBarry Smith         }
2355ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
236aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
237a8c6a408SBarry Smith         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
2380a198c4cSBarry Smith #endif
2391eb62cbbSBarry Smith         else {
240227d817aSBarry Smith           if (mat->was_assembled) {
241905e6a2fSBarry Smith             if (!aij->colmap) {
242905e6a2fSBarry Smith               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
243905e6a2fSBarry Smith             }
244aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
2450f5bd95cSBarry Smith             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
246fa46199cSSatish Balay 	    col--;
247b1fc9764SSatish Balay #else
248905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
249b1fc9764SSatish Balay #endif
250ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2512493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
2524b0e389bSBarry Smith               col =  in[j];
2539bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
254f9508a3cSSatish Balay               B = aij->B;
255f9508a3cSSatish Balay               b = (Mat_SeqAIJ *) B->data;
256f9508a3cSSatish Balay               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
257f9508a3cSSatish Balay               ba = b->a;
258d6dfbf8fSBarry Smith             }
259c48de900SBarry Smith           } else col = in[j];
2604b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
26130770e4dSSatish Balay           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
26230770e4dSSatish Balay           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
2631eb62cbbSBarry Smith         }
2641eb62cbbSBarry Smith       }
2655ef9f2a5SBarry Smith     } else {
26690f02eecSBarry Smith       if (!aij->donotstash) {
267d36fbae8SSatish Balay         if (roworiented) {
2688798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
269d36fbae8SSatish Balay         } else {
2708798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
2714b0e389bSBarry Smith         }
2721eb62cbbSBarry Smith       }
2738a729477SBarry Smith     }
27490f02eecSBarry Smith   }
2753a40ed3dSBarry Smith   PetscFunctionReturn(0);
2768a729477SBarry Smith }
2778a729477SBarry Smith 
2785615d1e5SSatish Balay #undef __FUNC__
2795615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ"
2808f6be9afSLois Curfman McInnes int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
281b49de8d1SLois Curfman McInnes {
282b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
283b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
284b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
285b49de8d1SLois Curfman McInnes 
2863a40ed3dSBarry Smith   PetscFunctionBegin;
287b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
288a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
289a8c6a408SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
290b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
291b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
292b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
293a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
294a8c6a408SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
295b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
296b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
297b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298fa852ad4SSatish Balay         } else {
299905e6a2fSBarry Smith           if (!aij->colmap) {
300905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301905e6a2fSBarry Smith           }
302aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
3030f5bd95cSBarry Smith           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304fa46199cSSatish Balay           col --;
305b1fc9764SSatish Balay #else
306905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
307b1fc9764SSatish Balay #endif
308e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309d9d09a02SSatish Balay           else {
310b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311b49de8d1SLois Curfman McInnes           }
312b49de8d1SLois Curfman McInnes         }
313b49de8d1SLois Curfman McInnes       }
314a8c6a408SBarry Smith     } else {
315a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
316b49de8d1SLois Curfman McInnes     }
317b49de8d1SLois Curfman McInnes   }
3183a40ed3dSBarry Smith   PetscFunctionReturn(0);
319b49de8d1SLois Curfman McInnes }
320bc5ccf88SSatish Balay 
321bc5ccf88SSatish Balay #undef __FUNC__
322bc5ccf88SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
323bc5ccf88SSatish Balay int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324bc5ccf88SSatish Balay {
325bc5ccf88SSatish Balay   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
326d36fbae8SSatish Balay   int         ierr,nstash,reallocs;
327bc5ccf88SSatish Balay   InsertMode  addv;
328bc5ccf88SSatish Balay 
329bc5ccf88SSatish Balay   PetscFunctionBegin;
330bc5ccf88SSatish Balay   if (aij->donotstash) {
331bc5ccf88SSatish Balay     PetscFunctionReturn(0);
332bc5ccf88SSatish Balay   }
333bc5ccf88SSatish Balay 
334bc5ccf88SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
335bc5ccf88SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336bc5ccf88SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337bc5ccf88SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
338bc5ccf88SSatish Balay   }
339bc5ccf88SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
340bc5ccf88SSatish Balay 
3418798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
3428798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
3435a655dc6SBarry Smith   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344bc5ccf88SSatish Balay   PetscFunctionReturn(0);
345bc5ccf88SSatish Balay }
346bc5ccf88SSatish Balay 
347bc5ccf88SSatish Balay 
348bc5ccf88SSatish Balay #undef __FUNC__
349bc5ccf88SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
350bc5ccf88SSatish Balay int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351bc5ccf88SSatish Balay {
352bc5ccf88SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
353a2d1c673SSatish Balay   int         i,j,rstart,ncols,n,ierr,flg;
354bc5ccf88SSatish Balay   int         *row,*col,other_disassembled;
355bc5ccf88SSatish Balay   Scalar      *val;
356bc5ccf88SSatish Balay   InsertMode  addv = mat->insertmode;
357bc5ccf88SSatish Balay 
358bc5ccf88SSatish Balay   PetscFunctionBegin;
359bc5ccf88SSatish Balay   if (!aij->donotstash) {
360a2d1c673SSatish Balay     while (1) {
3618798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362a2d1c673SSatish Balay       if (!flg) break;
363a2d1c673SSatish Balay 
364bc5ccf88SSatish Balay       for ( i=0; i<n; ) {
365bc5ccf88SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
366bc5ccf88SSatish Balay         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
367bc5ccf88SSatish Balay         if (j < n) ncols = j-i;
368bc5ccf88SSatish Balay         else       ncols = n-i;
369bc5ccf88SSatish Balay         /* Now assemble all these values with a single function call */
370bc5ccf88SSatish Balay         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371bc5ccf88SSatish Balay         i = j;
372bc5ccf88SSatish Balay       }
373bc5ccf88SSatish Balay     }
3748798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375bc5ccf88SSatish Balay   }
376bc5ccf88SSatish Balay 
377bc5ccf88SSatish Balay   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378bc5ccf88SSatish Balay   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379bc5ccf88SSatish Balay 
380bc5ccf88SSatish Balay   /* determine if any processor has disassembled, if so we must
381bc5ccf88SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
382bc5ccf88SSatish Balay   /*
383bc5ccf88SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
384bc5ccf88SSatish Balay      no processor disassembled thus we can skip this stuff
385bc5ccf88SSatish Balay   */
386bc5ccf88SSatish Balay   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
387bc5ccf88SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388bc5ccf88SSatish Balay     if (mat->was_assembled && !other_disassembled) {
389bc5ccf88SSatish Balay       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390bc5ccf88SSatish Balay     }
391bc5ccf88SSatish Balay   }
392bc5ccf88SSatish Balay 
393bc5ccf88SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394bc5ccf88SSatish Balay     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395bc5ccf88SSatish Balay   }
396bc5ccf88SSatish Balay   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397bc5ccf88SSatish Balay   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398bc5ccf88SSatish Balay 
399606d414cSSatish Balay   if (aij->rowvalues) {
400606d414cSSatish Balay     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401606d414cSSatish Balay     aij->rowvalues = 0;
402606d414cSSatish Balay   }
403bc5ccf88SSatish Balay   PetscFunctionReturn(0);
404bc5ccf88SSatish Balay }
405bc5ccf88SSatish Balay 
4065615d1e5SSatish Balay #undef __FUNC__
4075615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ"
4088f6be9afSLois Curfman McInnes int MatZeroEntries_MPIAIJ(Mat A)
4091eb62cbbSBarry Smith {
41044a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
411dbd7a890SLois Curfman McInnes   int        ierr;
4123a40ed3dSBarry Smith 
4133a40ed3dSBarry Smith   PetscFunctionBegin;
41478b31e54SBarry Smith   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
41578b31e54SBarry Smith   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
4163a40ed3dSBarry Smith   PetscFunctionReturn(0);
4171eb62cbbSBarry Smith }
4181eb62cbbSBarry Smith 
4195615d1e5SSatish Balay #undef __FUNC__
4205615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ"
4218f6be9afSLois Curfman McInnes int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
4221eb62cbbSBarry Smith {
42344a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
42417699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
4256a5c57faSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
42617699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
4275392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
4286a5c57faSSatish Balay   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
4301eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
4311eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
4321eb62cbbSBarry Smith   IS             istmp;
4331eb62cbbSBarry Smith 
4343a40ed3dSBarry Smith   PetscFunctionBegin;
43577c4ece6SBarry Smith   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
43678b31e54SBarry Smith   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
4371eb62cbbSBarry Smith 
4381eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
4390452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
440549d3d68SSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
441549d3d68SSatish Balay   procs  = nprocs + size;
4420452661fSBarry Smith   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
4431eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
4441eb62cbbSBarry Smith     idx = rows[i];
4451eb62cbbSBarry Smith     found = 0;
44617699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
4471eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
4481eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
4491eb62cbbSBarry Smith       }
4501eb62cbbSBarry Smith     }
451a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
4521eb62cbbSBarry Smith   }
45317699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
4541eb62cbbSBarry Smith 
4551eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
456*6831982aSBarry Smith   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
457*6831982aSBarry Smith   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
458*6831982aSBarry Smith   nrecvs = work[size+rank];
45917699dbbSLois Curfman McInnes   nmax   = work[rank];
460606d414cSSatish Balay   ierr   = PetscFree(work);CHKERRQ(ierr);
4611eb62cbbSBarry Smith 
4621eb62cbbSBarry Smith   /* post receives:   */
4633a40ed3dSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
464ca161407SBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
4651eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
466ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
4671eb62cbbSBarry Smith   }
4681eb62cbbSBarry Smith 
4691eb62cbbSBarry Smith   /* do sends:
4701eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
4711eb62cbbSBarry Smith          the ith processor
4721eb62cbbSBarry Smith   */
4730452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
4743a40ed3dSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
4750452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
4761eb62cbbSBarry Smith   starts[0] = 0;
47717699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4781eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
4791eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
4801eb62cbbSBarry Smith   }
481*6831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
4821eb62cbbSBarry Smith 
4831eb62cbbSBarry Smith   starts[0] = 0;
48417699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4851eb62cbbSBarry Smith   count = 0;
48617699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
4871eb62cbbSBarry Smith     if (procs[i]) {
488ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
4891eb62cbbSBarry Smith     }
4901eb62cbbSBarry Smith   }
491606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
4921eb62cbbSBarry Smith 
49317699dbbSLois Curfman McInnes   base = owners[rank];
4941eb62cbbSBarry Smith 
4951eb62cbbSBarry Smith   /*  wait on receives */
4960452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
4971eb62cbbSBarry Smith   source = lens + nrecvs;
4981eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
4991eb62cbbSBarry Smith   while (count) {
500ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
5011eb62cbbSBarry Smith     /* unpack receives into our local space */
502ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
503d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
504d6dfbf8fSBarry Smith     lens[imdex]    = n;
5051eb62cbbSBarry Smith     slen          += n;
5061eb62cbbSBarry Smith     count--;
5071eb62cbbSBarry Smith   }
508606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
5091eb62cbbSBarry Smith 
5101eb62cbbSBarry Smith   /* move the data into the send scatter */
5110452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
5121eb62cbbSBarry Smith   count = 0;
5131eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
5141eb62cbbSBarry Smith     values = rvalues + i*nmax;
5151eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
5161eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
5171eb62cbbSBarry Smith     }
5181eb62cbbSBarry Smith   }
519606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
520606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
521606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
522606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
5231eb62cbbSBarry Smith 
5241eb62cbbSBarry Smith   /* actually zap the local rows */
525029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
526464493b3SBarry Smith   PLogObjectParent(A,istmp);
5276a5c57faSSatish Balay 
5286eb55b6aSBarry Smith   /*
5296eb55b6aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
5306eb55b6aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
5316eb55b6aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
5326eb55b6aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
5336eb55b6aSBarry Smith 
5346eb55b6aSBarry Smith        Contributed by: Mathew Knepley
5356eb55b6aSBarry Smith   */
536e2d53e46SBarry Smith   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
537e2d53e46SBarry Smith   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
5386eb55b6aSBarry Smith   if (diag && (l->A->M == l->A->N)) {
5396eb55b6aSBarry Smith     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
540e2d53e46SBarry Smith   } else if (diag) {
541e2d53e46SBarry Smith     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
542fa46199cSSatish Balay     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
543fa46199cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
544fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
5456525c446SSatish Balay     }
546e2d53e46SBarry Smith     for ( i = 0; i < slen; i++ ) {
547e2d53e46SBarry Smith       row  = lrows[i] + rstart;
548e2d53e46SBarry Smith       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
549e2d53e46SBarry Smith     }
550e2d53e46SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551e2d53e46SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5526eb55b6aSBarry Smith   } else {
5536a5c57faSSatish Balay     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
5546eb55b6aSBarry Smith   }
55578b31e54SBarry Smith   ierr = ISDestroy(istmp);CHKERRQ(ierr);
556606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
55772dacd9aSBarry Smith 
5581eb62cbbSBarry Smith   /* wait on sends */
5591eb62cbbSBarry Smith   if (nsends) {
560ca161407SBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
561ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
562606d414cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
5631eb62cbbSBarry Smith   }
564606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
565606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
5661eb62cbbSBarry Smith 
5673a40ed3dSBarry Smith   PetscFunctionReturn(0);
5681eb62cbbSBarry Smith }
5691eb62cbbSBarry Smith 
5705615d1e5SSatish Balay #undef __FUNC__
5715615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ"
5728f6be9afSLois Curfman McInnes int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
5731eb62cbbSBarry Smith {
574416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
575fbd6ef76SBarry Smith   int        ierr,nt;
576416022c9SBarry Smith 
5773a40ed3dSBarry Smith   PetscFunctionBegin;
578a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
579fbd6ef76SBarry Smith   if (nt != a->n) {
580a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
581fbd6ef76SBarry Smith   }
58243a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
58443a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
5863a40ed3dSBarry Smith   PetscFunctionReturn(0);
5871eb62cbbSBarry Smith }
5881eb62cbbSBarry Smith 
5895615d1e5SSatish Balay #undef __FUNC__
5905615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ"
5918f6be9afSLois Curfman McInnes int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
592da3a660dSBarry Smith {
593416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
594da3a660dSBarry Smith   int        ierr;
5953a40ed3dSBarry Smith 
5963a40ed3dSBarry Smith   PetscFunctionBegin;
59743a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
59943a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
6013a40ed3dSBarry Smith   PetscFunctionReturn(0);
602da3a660dSBarry Smith }
603da3a660dSBarry Smith 
6045615d1e5SSatish Balay #undef __FUNC__
6055615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ"
6068f6be9afSLois Curfman McInnes int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
607da3a660dSBarry Smith {
608416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
609da3a660dSBarry Smith   int        ierr;
610da3a660dSBarry Smith 
6113a40ed3dSBarry Smith   PetscFunctionBegin;
612da3a660dSBarry Smith   /* do nondiagonal part */
613f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
614da3a660dSBarry Smith   /* send it on its way */
615537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
616da3a660dSBarry Smith   /* do local part */
617f830108cSBarry Smith   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
618da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
619da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
620da3a660dSBarry Smith   /* but is not perhaps always true. */
621537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
6223a40ed3dSBarry Smith   PetscFunctionReturn(0);
623da3a660dSBarry Smith }
624da3a660dSBarry Smith 
6255615d1e5SSatish Balay #undef __FUNC__
6265615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ"
6278f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
628da3a660dSBarry Smith {
629416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
630da3a660dSBarry Smith   int        ierr;
631da3a660dSBarry Smith 
6323a40ed3dSBarry Smith   PetscFunctionBegin;
633da3a660dSBarry Smith   /* do nondiagonal part */
634f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
635da3a660dSBarry Smith   /* send it on its way */
636537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
637da3a660dSBarry Smith   /* do local part */
638f830108cSBarry Smith   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
639da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
640da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
641da3a660dSBarry Smith   /* but is not perhaps always true. */
6420a198c4cSBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
6433a40ed3dSBarry Smith   PetscFunctionReturn(0);
644da3a660dSBarry Smith }
645da3a660dSBarry Smith 
6461eb62cbbSBarry Smith /*
6471eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
6481eb62cbbSBarry Smith    diagonal block
6491eb62cbbSBarry Smith */
6505615d1e5SSatish Balay #undef __FUNC__
6515615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ"
6528f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
6531eb62cbbSBarry Smith {
6543a40ed3dSBarry Smith   int        ierr;
655416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
6563a40ed3dSBarry Smith 
6573a40ed3dSBarry Smith   PetscFunctionBegin;
658a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
6595baf8537SBarry Smith   if (a->rstart != a->cstart || a->rend != a->cend) {
660a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
6613a40ed3dSBarry Smith   }
6623a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
6633a40ed3dSBarry Smith   PetscFunctionReturn(0);
6641eb62cbbSBarry Smith }
6651eb62cbbSBarry Smith 
6665615d1e5SSatish Balay #undef __FUNC__
6675615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ"
6688f6be9afSLois Curfman McInnes int MatScale_MPIAIJ(Scalar *aa,Mat A)
669052efed2SBarry Smith {
670052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
671052efed2SBarry Smith   int        ierr;
6723a40ed3dSBarry Smith 
6733a40ed3dSBarry Smith   PetscFunctionBegin;
674052efed2SBarry Smith   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
675052efed2SBarry Smith   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
6763a40ed3dSBarry Smith   PetscFunctionReturn(0);
677052efed2SBarry Smith }
678052efed2SBarry Smith 
6795615d1e5SSatish Balay #undef __FUNC__
680d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ"
681e1311b90SBarry Smith int MatDestroy_MPIAIJ(Mat mat)
6821eb62cbbSBarry Smith {
68344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
6841eb62cbbSBarry Smith   int        ierr;
68583e2fdc7SBarry Smith 
6863a40ed3dSBarry Smith   PetscFunctionBegin;
68770429bc8SBarry Smith 
68870429bc8SBarry Smith   if (mat->mapping) {
68970429bc8SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
69070429bc8SBarry Smith   }
69170429bc8SBarry Smith   if (mat->bmapping) {
69270429bc8SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
69370429bc8SBarry Smith   }
69461b13de0SBarry Smith   if (mat->rmap) {
69561b13de0SBarry Smith     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
69661b13de0SBarry Smith   }
69761b13de0SBarry Smith   if (mat->cmap) {
69861b13de0SBarry Smith     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
69961b13de0SBarry Smith   }
700aa482453SBarry Smith #if defined(PETSC_USE_LOG)
701e1311b90SBarry Smith   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
702a5a9c739SBarry Smith #endif
7038798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
704606d414cSSatish Balay   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
70578b31e54SBarry Smith   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
70678b31e54SBarry Smith   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
707aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
7080f5bd95cSBarry Smith   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
709b1fc9764SSatish Balay #else
710606d414cSSatish Balay   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
711b1fc9764SSatish Balay #endif
712606d414cSSatish Balay   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
7131eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
714a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
715606d414cSSatish Balay   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
716606d414cSSatish Balay   ierr = PetscFree(aij);CHKERRQ(ierr);
717a5a9c739SBarry Smith   PLogObjectDestroy(mat);
7180452661fSBarry Smith   PetscHeaderDestroy(mat);
7193a40ed3dSBarry Smith   PetscFunctionReturn(0);
7201eb62cbbSBarry Smith }
721ee50ffe9SBarry Smith 
7225615d1e5SSatish Balay #undef __FUNC__
723d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary"
724bc5ccf88SSatish Balay int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
7251eb62cbbSBarry Smith {
726416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
727416022c9SBarry Smith   int         ierr;
728416022c9SBarry Smith 
7293a40ed3dSBarry Smith   PetscFunctionBegin;
73017699dbbSLois Curfman McInnes   if (aij->size == 1) {
731416022c9SBarry Smith     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
732416022c9SBarry Smith   }
733a8c6a408SBarry Smith   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
7343a40ed3dSBarry Smith   PetscFunctionReturn(0);
735416022c9SBarry Smith }
736416022c9SBarry Smith 
7375615d1e5SSatish Balay #undef __FUNC__
7387b2a1423SBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
739bc5ccf88SSatish Balay int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
740416022c9SBarry Smith {
74144a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
742dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
743*6831982aSBarry Smith   int         ierr, format,shift = C->indexshift,rank = aij->rank,size = aij->size;
744*6831982aSBarry Smith   PetscTruth  isdraw,isascii;
745416022c9SBarry Smith 
7463a40ed3dSBarry Smith   PetscFunctionBegin;
747*6831982aSBarry Smith   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
748*6831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
7490f5bd95cSBarry Smith   if (isascii) {
750d8467735SBarry Smith     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
7510a198c4cSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
7524e220ebcSLois Curfman McInnes       MatInfo info;
7534e220ebcSLois Curfman McInnes       int     flg;
7541dab6e02SBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755888f2ed8SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
75695e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
757*6831982aSBarry Smith       if (flg) {
758*6831982aSBarry Smith         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
759*6831982aSBarry Smith 					      rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
760*6831982aSBarry Smith       } else {
761*6831982aSBarry Smith         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
762*6831982aSBarry Smith 		    rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
763*6831982aSBarry Smith       }
764888f2ed8SSatish Balay       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
765*6831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
766888f2ed8SSatish Balay       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
767*6831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
768*6831982aSBarry Smith       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
769a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
7703a40ed3dSBarry Smith       PetscFunctionReturn(0);
7713a40ed3dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
7723a40ed3dSBarry Smith       PetscFunctionReturn(0);
77308480c60SBarry Smith     }
7740f5bd95cSBarry Smith   } else if (isdraw) {
77519bcc07fSBarry Smith     Draw       draw;
77619bcc07fSBarry Smith     PetscTruth isnull;
77777ed5343SBarry Smith     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
7783a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
77919bcc07fSBarry Smith   }
78019bcc07fSBarry Smith 
78117699dbbSLois Curfman McInnes   if (size == 1) {
78278b31e54SBarry Smith     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
7833a40ed3dSBarry Smith   } else {
78495373324SBarry Smith     /* assemble the entire matrix onto first processor. */
78595373324SBarry Smith     Mat         A;
786ec8511deSBarry Smith     Mat_SeqAIJ *Aloc;
7872eb8c8abSBarry Smith     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
78895373324SBarry Smith     Scalar      *a;
7892ee70a88SLois Curfman McInnes 
79017699dbbSLois Curfman McInnes     if (!rank) {
79155843e3eSBarry Smith       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
7923a40ed3dSBarry Smith     } else {
79355843e3eSBarry Smith       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
79495373324SBarry Smith     }
795464493b3SBarry Smith     PLogObjectParent(mat,A);
796416022c9SBarry Smith 
79795373324SBarry Smith     /* copy over the A part */
798ec8511deSBarry Smith     Aloc = (Mat_SeqAIJ*) aij->A->data;
7992ee70a88SLois Curfman McInnes     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
80095373324SBarry Smith     row = aij->rstart;
801dbb450caSBarry Smith     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
80295373324SBarry Smith     for ( i=0; i<m; i++ ) {
803416022c9SBarry Smith       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
80495373324SBarry Smith       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
80595373324SBarry Smith     }
8062ee70a88SLois Curfman McInnes     aj = Aloc->j;
807dbb450caSBarry Smith     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
80895373324SBarry Smith 
80995373324SBarry Smith     /* copy over the B part */
810ec8511deSBarry Smith     Aloc = (Mat_SeqAIJ*) aij->B->data;
8112ee70a88SLois Curfman McInnes     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
81295373324SBarry Smith     row = aij->rstart;
8130452661fSBarry Smith     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
814dbb450caSBarry Smith     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
81595373324SBarry Smith     for ( i=0; i<m; i++ ) {
816416022c9SBarry Smith       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
81795373324SBarry Smith       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
81895373324SBarry Smith     }
819606d414cSSatish Balay     ierr = PetscFree(ct);CHKERRQ(ierr);
8206d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
8216d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
82255843e3eSBarry Smith     /*
82355843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
82455843e3eSBarry Smith        synchronized across all processors that share the Draw object
82555843e3eSBarry Smith     */
826*6831982aSBarry Smith     if (!rank) {
827*6831982aSBarry Smith       Viewer sviewer;
828*6831982aSBarry Smith       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
829*6831982aSBarry Smith       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
830*6831982aSBarry Smith       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
83195373324SBarry Smith     }
83278b31e54SBarry Smith     ierr = MatDestroy(A);CHKERRQ(ierr);
83395373324SBarry Smith   }
8343a40ed3dSBarry Smith   PetscFunctionReturn(0);
8351eb62cbbSBarry Smith }
8361eb62cbbSBarry Smith 
8375615d1e5SSatish Balay #undef __FUNC__
838d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ"
839e1311b90SBarry Smith int MatView_MPIAIJ(Mat mat,Viewer viewer)
840416022c9SBarry Smith {
841416022c9SBarry Smith   int        ierr;
842*6831982aSBarry Smith   PetscTruth isascii,isdraw,issocket,isbinary;
843416022c9SBarry Smith 
8443a40ed3dSBarry Smith   PetscFunctionBegin;
845*6831982aSBarry Smith   ierr  = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
846*6831982aSBarry Smith   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
847*6831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
848*6831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
8490f5bd95cSBarry Smith   if (isascii || isdraw || isbinary || issocket) {
8507b2a1423SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
8515cd90555SBarry Smith   } else {
8520f5bd95cSBarry Smith     SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
853416022c9SBarry Smith   }
8543a40ed3dSBarry Smith   PetscFunctionReturn(0);
855416022c9SBarry Smith }
856416022c9SBarry Smith 
8571eb62cbbSBarry Smith /*
8581eb62cbbSBarry Smith     This has to provide several versions.
8591eb62cbbSBarry Smith 
8601eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
8611eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
862d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
8631eb62cbbSBarry Smith */
8645615d1e5SSatish Balay #undef __FUNC__
8655615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ"
8668f6be9afSLois Curfman McInnes int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
867dbb450caSBarry Smith                            double fshift,int its,Vec xx)
8688a729477SBarry Smith {
86944a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
870d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
871ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
872c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
8736abc6512SBarry Smith   int        ierr,*idx, *diag;
874416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
8758a729477SBarry Smith 
8763a40ed3dSBarry Smith   PetscFunctionBegin;
87783e2fdc7SBarry Smith   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);}
878d6dfbf8fSBarry Smith   diag = A->diag;
879c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
880da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
881f830108cSBarry Smith       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
8823a40ed3dSBarry Smith       PetscFunctionReturn(0);
883da3a660dSBarry Smith     }
8843a40ed3dSBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
8853a40ed3dSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
886184914b5SBarry Smith     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
887184914b5SBarry Smith     if (xx != bb) {
888184914b5SBarry Smith       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
889184914b5SBarry Smith     } else {
890184914b5SBarry Smith       b = x;
891184914b5SBarry Smith     }
892184914b5SBarry Smith     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
893184914b5SBarry Smith     xs = x + shift; /* shift by one for index start of 1 */
894184914b5SBarry Smith     ls = ls + shift;
895d6dfbf8fSBarry Smith     while (its--) {
896d6dfbf8fSBarry Smith       /* go down through the rows */
897d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
898d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
8994d197716SBarry Smith 	PLogFlops(4*n+3);
900dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
901dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
902d6dfbf8fSBarry Smith         sum  = b[i];
903d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
904dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
905d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
906dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
907dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
908d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
90955a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
910d6dfbf8fSBarry Smith       }
911d6dfbf8fSBarry Smith       /* come up through the rows */
912d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
913d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
9144d197716SBarry Smith 	PLogFlops(4*n+3)
915dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
916dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
917d6dfbf8fSBarry Smith         sum  = b[i];
918d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
919dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
920d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
921dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
922dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
923d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
92455a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
925d6dfbf8fSBarry Smith       }
926d6dfbf8fSBarry Smith     }
927184914b5SBarry Smith     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
928184914b5SBarry Smith     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
929184914b5SBarry Smith     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
9303a40ed3dSBarry Smith   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
931da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
932f830108cSBarry Smith       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
9333a40ed3dSBarry Smith       PetscFunctionReturn(0);
934da3a660dSBarry Smith     }
9353a40ed3dSBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
9363a40ed3dSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
937184914b5SBarry Smith     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
938184914b5SBarry Smith     if (xx != bb) {
939184914b5SBarry Smith       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
940184914b5SBarry Smith     } else {
941184914b5SBarry Smith       b = x;
942184914b5SBarry Smith     }
943184914b5SBarry Smith     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
944184914b5SBarry Smith     xs = x + shift; /* shift by one for index start of 1 */
945184914b5SBarry Smith     ls = ls + shift;
946d6dfbf8fSBarry Smith     while (its--) {
947d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
948d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
9494d197716SBarry Smith 	PLogFlops(4*n+3);
950dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
951dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
952d6dfbf8fSBarry Smith         sum  = b[i];
953d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
954dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
955d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
956dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
957dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
958d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
95955a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
960d6dfbf8fSBarry Smith       }
961d6dfbf8fSBarry Smith     }
962184914b5SBarry Smith     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
963184914b5SBarry Smith     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
964184914b5SBarry Smith     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
9653a40ed3dSBarry Smith   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
966da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
967f830108cSBarry Smith       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
9683a40ed3dSBarry Smith       PetscFunctionReturn(0);
969da3a660dSBarry Smith     }
9702e8a6d31SBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
9712e8a6d31SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
972184914b5SBarry Smith     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
973184914b5SBarry Smith     if (xx != bb) {
974184914b5SBarry Smith       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
975184914b5SBarry Smith     } else {
976184914b5SBarry Smith       b = x;
977184914b5SBarry Smith     }
978184914b5SBarry Smith     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
979184914b5SBarry Smith     xs = x + shift; /* shift by one for index start of 1 */
980184914b5SBarry Smith     ls = ls + shift;
981d6dfbf8fSBarry Smith     while (its--) {
982d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
983d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
9844d197716SBarry Smith 	PLogFlops(4*n+3);
985dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
986dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
987d6dfbf8fSBarry Smith         sum  = b[i];
988d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
989dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
990d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
991dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
992dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
993d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
99455a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
995d6dfbf8fSBarry Smith       }
996d6dfbf8fSBarry Smith     }
997184914b5SBarry Smith     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
998184914b5SBarry Smith     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
999184914b5SBarry Smith     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
10003a40ed3dSBarry Smith   } else {
1001a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1002c16cb8f2SBarry Smith   }
10033a40ed3dSBarry Smith   PetscFunctionReturn(0);
10048a729477SBarry Smith }
1005a66be287SLois Curfman McInnes 
10065615d1e5SSatish Balay #undef __FUNC__
1007d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ"
10088f6be9afSLois Curfman McInnes int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1009a66be287SLois Curfman McInnes {
1010a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1011a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
10124e220ebcSLois Curfman McInnes   int        ierr;
10134e220ebcSLois Curfman McInnes   double     isend[5], irecv[5];
1014a66be287SLois Curfman McInnes 
10153a40ed3dSBarry Smith   PetscFunctionBegin;
10164e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
10174e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
10184e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
10194e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
10204e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
10214e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
10224e220ebcSLois Curfman McInnes   isend[3] += info->memory;  isend[4] += info->mallocs;
1023a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
10244e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
10254e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
10264e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
10274e220ebcSLois Curfman McInnes     info->memory       = isend[3];
10284e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
1029a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1030ca161407SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
10314e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10324e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10334e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10344e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10354e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1036a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1037ca161407SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
10384e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10394e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10404e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10414e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10424e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1043a66be287SLois Curfman McInnes   }
10444e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
10454e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
10464e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
10478d700155SBarry Smith   info->rows_global       = (double)mat->M;
10488d700155SBarry Smith   info->columns_global    = (double)mat->N;
10498d700155SBarry Smith   info->rows_local        = (double)mat->m;
10508d700155SBarry Smith   info->columns_local     = (double)mat->N;
10514e220ebcSLois Curfman McInnes 
10523a40ed3dSBarry Smith   PetscFunctionReturn(0);
1053a66be287SLois Curfman McInnes }
1054a66be287SLois Curfman McInnes 
10555615d1e5SSatish Balay #undef __FUNC__
1056d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ"
10578f6be9afSLois Curfman McInnes int MatSetOption_MPIAIJ(Mat A,MatOption op)
1058c74985f6SBarry Smith {
1059c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
10601dee9f54SBarry Smith   int        ierr;
1061c74985f6SBarry Smith 
10623a40ed3dSBarry Smith   PetscFunctionBegin;
10636d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
10646d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
10656da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1066c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
10674787f768SSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
10684787f768SSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
10691dee9f54SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
10701dee9f54SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1071b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1072aeafbbfcSLois Curfman McInnes     a->roworiented = 1;
10731dee9f54SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
10741dee9f54SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1075b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
10766da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
10776d4a8577SBarry Smith              op == MAT_SYMMETRIC ||
10786d4a8577SBarry Smith              op == MAT_STRUCTURALLY_SYMMETRIC ||
10796d4a8577SBarry Smith              op == MAT_YES_NEW_DIAGONALS)
1080981c4779SBarry Smith     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
10816d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
10824b0e389bSBarry Smith     a->roworiented = 0;
10831dee9f54SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
10841dee9f54SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
10852b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
108690f02eecSBarry Smith     a->donotstash = 1;
10873a40ed3dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS){
10883a40ed3dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
10893a40ed3dSBarry Smith   } else {
10903a40ed3dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
10913a40ed3dSBarry Smith   }
10923a40ed3dSBarry Smith   PetscFunctionReturn(0);
1093c74985f6SBarry Smith }
1094c74985f6SBarry Smith 
10955615d1e5SSatish Balay #undef __FUNC__
1096d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ"
10978f6be9afSLois Curfman McInnes int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1098c74985f6SBarry Smith {
109944a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
11003a40ed3dSBarry Smith 
11013a40ed3dSBarry Smith   PetscFunctionBegin;
11020752156aSBarry Smith   if (m) *m = mat->M;
11030752156aSBarry Smith   if (n) *n = mat->N;
11043a40ed3dSBarry Smith   PetscFunctionReturn(0);
1105c74985f6SBarry Smith }
1106c74985f6SBarry Smith 
11075615d1e5SSatish Balay #undef __FUNC__
1108d4bb536fSBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ"
11098f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1110c74985f6SBarry Smith {
111144a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
11123a40ed3dSBarry Smith 
11133a40ed3dSBarry Smith   PetscFunctionBegin;
11140752156aSBarry Smith   if (m) *m = mat->m;
1115f830108cSBarry Smith   if (n) *n = mat->n;
11163a40ed3dSBarry Smith   PetscFunctionReturn(0);
1117c74985f6SBarry Smith }
1118c74985f6SBarry Smith 
11195615d1e5SSatish Balay #undef __FUNC__
1120d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
11218f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1122c74985f6SBarry Smith {
112344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
11243a40ed3dSBarry Smith 
11253a40ed3dSBarry Smith   PetscFunctionBegin;
1126c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
11273a40ed3dSBarry Smith   PetscFunctionReturn(0);
1128c74985f6SBarry Smith }
1129c74985f6SBarry Smith 
11305615d1e5SSatish Balay #undef __FUNC__
11315615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ"
11326d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
113339e00950SLois Curfman McInnes {
1134154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
113570f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1136154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1137154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
113870f0671dSBarry Smith   int        *cmap, *idx_p;
113939e00950SLois Curfman McInnes 
11403a40ed3dSBarry Smith   PetscFunctionBegin;
1141a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
11427a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
11437a0afa10SBarry Smith 
114470f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
11457a0afa10SBarry Smith     /*
11467a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
11477a0afa10SBarry Smith     */
11487a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1149c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
1150c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
11517a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
11527a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
11537a0afa10SBarry Smith     }
11543f97c4b0SBarry Smith     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
11557a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
11567a0afa10SBarry Smith   }
11577a0afa10SBarry Smith 
1158a8c6a408SBarry Smith   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1159abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
116039e00950SLois Curfman McInnes 
1161154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1162154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1163154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1164f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1165f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1166154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1167154123eaSLois Curfman McInnes 
116870f0671dSBarry Smith   cmap  = mat->garray;
1169154123eaSLois Curfman McInnes   if (v  || idx) {
1170154123eaSLois Curfman McInnes     if (nztot) {
1171154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
117270f0671dSBarry Smith       int imark = -1;
1173154123eaSLois Curfman McInnes       if (v) {
117470f0671dSBarry Smith         *v = v_p = mat->rowvalues;
117539e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
117670f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1177154123eaSLois Curfman McInnes           else break;
1178154123eaSLois Curfman McInnes         }
1179154123eaSLois Curfman McInnes         imark = i;
118070f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
118170f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1182154123eaSLois Curfman McInnes       }
1183154123eaSLois Curfman McInnes       if (idx) {
118470f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
118570f0671dSBarry Smith         if (imark > -1) {
118670f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
118770f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
118870f0671dSBarry Smith           }
118970f0671dSBarry Smith         } else {
1190154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
119170f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1192154123eaSLois Curfman McInnes             else break;
1193154123eaSLois Curfman McInnes           }
1194154123eaSLois Curfman McInnes           imark = i;
119570f0671dSBarry Smith         }
119670f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
119770f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
119839e00950SLois Curfman McInnes       }
11993f97c4b0SBarry Smith     } else {
12001ca473b0SSatish Balay       if (idx) *idx = 0;
12011ca473b0SSatish Balay       if (v)   *v   = 0;
12021ca473b0SSatish Balay     }
1203154123eaSLois Curfman McInnes   }
120439e00950SLois Curfman McInnes   *nz = nztot;
1205f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1206f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
12073a40ed3dSBarry Smith   PetscFunctionReturn(0);
120839e00950SLois Curfman McInnes }
120939e00950SLois Curfman McInnes 
12105615d1e5SSatish Balay #undef __FUNC__
1211d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ"
12126d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
121339e00950SLois Curfman McInnes {
12147a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
12153a40ed3dSBarry Smith 
12163a40ed3dSBarry Smith   PetscFunctionBegin;
12177a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
1218a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
12197a0afa10SBarry Smith   }
12207a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
12213a40ed3dSBarry Smith   PetscFunctionReturn(0);
122239e00950SLois Curfman McInnes }
122339e00950SLois Curfman McInnes 
12245615d1e5SSatish Balay #undef __FUNC__
12255615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ"
12268f6be9afSLois Curfman McInnes int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1227855ac2c5SLois Curfman McInnes {
1228855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1229ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1230416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1231416022c9SBarry Smith   double     sum = 0.0;
123204ca555eSLois Curfman McInnes   Scalar     *v;
123304ca555eSLois Curfman McInnes 
12343a40ed3dSBarry Smith   PetscFunctionBegin;
123517699dbbSLois Curfman McInnes   if (aij->size == 1) {
123614183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
123737fa93a5SLois Curfman McInnes   } else {
123804ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
123904ca555eSLois Curfman McInnes       v = amat->a;
124004ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
1241aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
1242e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
124304ca555eSLois Curfman McInnes #else
124404ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
124504ca555eSLois Curfman McInnes #endif
124604ca555eSLois Curfman McInnes       }
124704ca555eSLois Curfman McInnes       v = bmat->a;
124804ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
1249aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
1250e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
125104ca555eSLois Curfman McInnes #else
125204ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
125304ca555eSLois Curfman McInnes #endif
125404ca555eSLois Curfman McInnes       }
1255ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
125604ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
12573a40ed3dSBarry Smith     } else if (type == NORM_1) { /* max column norm */
125804ca555eSLois Curfman McInnes       double *tmp, *tmp2;
125904ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
1260758f045eSSatish Balay       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1261758f045eSSatish Balay       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1262549d3d68SSatish Balay       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
126304ca555eSLois Curfman McInnes       *norm = 0.0;
126404ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
126504ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1266579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
126704ca555eSLois Curfman McInnes       }
126804ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
126904ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1270579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
127104ca555eSLois Curfman McInnes       }
1272ca161407SBarry Smith       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
127304ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
127404ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
127504ca555eSLois Curfman McInnes       }
1276606d414cSSatish Balay       ierr = PetscFree(tmp);CHKERRQ(ierr);
1277606d414cSSatish Balay       ierr = PetscFree(tmp2);CHKERRQ(ierr);
12783a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
1279515d9167SLois Curfman McInnes       double ntemp = 0.0;
128004ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1281dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
128204ca555eSLois Curfman McInnes         sum = 0.0;
128304ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1284cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
128504ca555eSLois Curfman McInnes         }
1286dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
128704ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1288cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
128904ca555eSLois Curfman McInnes         }
1290515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
129104ca555eSLois Curfman McInnes       }
1292ca161407SBarry Smith       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1293ca161407SBarry Smith     } else {
1294a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
129504ca555eSLois Curfman McInnes     }
129637fa93a5SLois Curfman McInnes   }
12973a40ed3dSBarry Smith   PetscFunctionReturn(0);
1298855ac2c5SLois Curfman McInnes }
1299855ac2c5SLois Curfman McInnes 
13005615d1e5SSatish Balay #undef __FUNC__
13015615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ"
13028f6be9afSLois Curfman McInnes int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1303b7c46309SBarry Smith {
1304b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1305dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1306416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1307b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
13083a40ed3dSBarry Smith   Mat        B;
1309b7c46309SBarry Smith   Scalar     *array;
1310b7c46309SBarry Smith 
13113a40ed3dSBarry Smith   PetscFunctionBegin;
1312d4bb536fSBarry Smith   if (matout == PETSC_NULL && M != N) {
1313a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1314d4bb536fSBarry Smith   }
1315d4bb536fSBarry Smith 
1316d4bb536fSBarry Smith   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1317b7c46309SBarry Smith 
1318b7c46309SBarry Smith   /* copy over the A part */
1319ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1320b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1321b7c46309SBarry Smith   row = a->rstart;
1322dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1323b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1324416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1325b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1326b7c46309SBarry Smith   }
1327b7c46309SBarry Smith   aj = Aloc->j;
13284af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1329b7c46309SBarry Smith 
1330b7c46309SBarry Smith   /* copy over the B part */
1331ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1332b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1333b7c46309SBarry Smith   row = a->rstart;
13340452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1335dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1336b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1337416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1338b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1339b7c46309SBarry Smith   }
1340606d414cSSatish Balay   ierr = PetscFree(ct);CHKERRQ(ierr);
13416d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
13426d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
13433638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
13440de55854SLois Curfman McInnes     *matout = B;
13450de55854SLois Curfman McInnes   } else {
1346f830108cSBarry Smith     PetscOps       *Abops;
1347f830108cSBarry Smith     struct _MatOps *Aops;
1348f830108cSBarry Smith 
13490de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
1350606d414cSSatish Balay     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
13510de55854SLois Curfman McInnes     ierr = MatDestroy(a->A);CHKERRQ(ierr);
13520de55854SLois Curfman McInnes     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1353aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
13540f5bd95cSBarry Smith     if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);}
1355b1fc9764SSatish Balay #else
1356606d414cSSatish Balay     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1357b1fc9764SSatish Balay #endif
1358606d414cSSatish Balay     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
13590de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1360a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1361606d414cSSatish Balay     ierr = PetscFree(a);CHKERRQ(ierr);
1362f830108cSBarry Smith 
1363f830108cSBarry Smith     /*
1364f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
1365f830108cSBarry Smith       A pointers for the bops and ops but copy everything
1366f830108cSBarry Smith       else from C.
1367f830108cSBarry Smith     */
1368f830108cSBarry Smith     Abops   = A->bops;
1369f830108cSBarry Smith     Aops    = A->ops;
1370549d3d68SSatish Balay     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1371f830108cSBarry Smith     A->bops = Abops;
1372f830108cSBarry Smith     A->ops  = Aops;
13730452661fSBarry Smith     PetscHeaderDestroy(B);
13740de55854SLois Curfman McInnes   }
13753a40ed3dSBarry Smith   PetscFunctionReturn(0);
1376b7c46309SBarry Smith }
1377b7c46309SBarry Smith 
13785615d1e5SSatish Balay #undef __FUNC__
13795615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ"
13804b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1381a008b906SSatish Balay {
13824b967eb1SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
13834b967eb1SSatish Balay   Mat        a = aij->A, b = aij->B;
1384a008b906SSatish Balay   int        ierr,s1,s2,s3;
1385a008b906SSatish Balay 
13863a40ed3dSBarry Smith   PetscFunctionBegin;
13874b967eb1SSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
13884b967eb1SSatish Balay   if (rr) {
1389e1311b90SBarry Smith     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1390a8c6a408SBarry Smith     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
13914b967eb1SSatish Balay     /* Overlap communication with computation. */
139243a90d84SBarry Smith     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1393a008b906SSatish Balay   }
13944b967eb1SSatish Balay   if (ll) {
1395e1311b90SBarry Smith     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1396a8c6a408SBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1397f830108cSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
13984b967eb1SSatish Balay   }
13994b967eb1SSatish Balay   /* scale  the diagonal block */
1400f830108cSBarry Smith   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
14014b967eb1SSatish Balay 
14024b967eb1SSatish Balay   if (rr) {
14034b967eb1SSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
140443a90d84SBarry Smith     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1405f830108cSBarry Smith     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
14064b967eb1SSatish Balay   }
14074b967eb1SSatish Balay 
14083a40ed3dSBarry Smith   PetscFunctionReturn(0);
1409a008b906SSatish Balay }
1410a008b906SSatish Balay 
1411a008b906SSatish Balay 
14125615d1e5SSatish Balay #undef __FUNC__
1413d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ"
14148f6be9afSLois Curfman McInnes int MatPrintHelp_MPIAIJ(Mat A)
1415682d7d0cSBarry Smith {
1416682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
14173a40ed3dSBarry Smith   int        ierr;
1418682d7d0cSBarry Smith 
14193a40ed3dSBarry Smith   PetscFunctionBegin;
14203a40ed3dSBarry Smith   if (!a->rank) {
14213a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
14223a40ed3dSBarry Smith   }
14233a40ed3dSBarry Smith   PetscFunctionReturn(0);
1424682d7d0cSBarry Smith }
1425682d7d0cSBarry Smith 
14265615d1e5SSatish Balay #undef __FUNC__
1427d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ"
14288f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
14295a838052SSatish Balay {
14303a40ed3dSBarry Smith   PetscFunctionBegin;
14315a838052SSatish Balay   *bs = 1;
14323a40ed3dSBarry Smith   PetscFunctionReturn(0);
14335a838052SSatish Balay }
14345615d1e5SSatish Balay #undef __FUNC__
1435d4bb536fSBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ"
14368f6be9afSLois Curfman McInnes int MatSetUnfactored_MPIAIJ(Mat A)
1437bb5a7306SBarry Smith {
1438bb5a7306SBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1439bb5a7306SBarry Smith   int        ierr;
14403a40ed3dSBarry Smith 
14413a40ed3dSBarry Smith   PetscFunctionBegin;
1442bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
14433a40ed3dSBarry Smith   PetscFunctionReturn(0);
1444bb5a7306SBarry Smith }
1445bb5a7306SBarry Smith 
1446d4bb536fSBarry Smith #undef __FUNC__
1447d4bb536fSBarry Smith #define __FUNC__ "MatEqual_MPIAIJ"
1448d4bb536fSBarry Smith int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1449d4bb536fSBarry Smith {
1450d4bb536fSBarry Smith   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1451d4bb536fSBarry Smith   Mat        a, b, c, d;
1452d4bb536fSBarry Smith   PetscTruth flg;
1453d4bb536fSBarry Smith   int        ierr;
1454d4bb536fSBarry Smith 
14553a40ed3dSBarry Smith   PetscFunctionBegin;
1456a8c6a408SBarry Smith   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1457d4bb536fSBarry Smith   a = matA->A; b = matA->B;
1458d4bb536fSBarry Smith   c = matB->A; d = matB->B;
1459d4bb536fSBarry Smith 
1460d4bb536fSBarry Smith   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1461d4bb536fSBarry Smith   if (flg == PETSC_TRUE) {
1462d4bb536fSBarry Smith     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1463d4bb536fSBarry Smith   }
1464ca161407SBarry Smith   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
14653a40ed3dSBarry Smith   PetscFunctionReturn(0);
1466d4bb536fSBarry Smith }
1467d4bb536fSBarry Smith 
1468cb5b572fSBarry Smith #undef __FUNC__
1469cb5b572fSBarry Smith #define __FUNC__ "MatCopy_MPIAIJ"
1470cb5b572fSBarry Smith int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1471cb5b572fSBarry Smith {
1472cb5b572fSBarry Smith   int        ierr;
1473cb5b572fSBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1474cb5b572fSBarry Smith   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1475cb5b572fSBarry Smith 
1476cb5b572fSBarry Smith   PetscFunctionBegin;
1477cb5b572fSBarry Smith   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1478cb5b572fSBarry Smith     /* because of the column compression in the off-processor part of the matrix a->B,
1479cb5b572fSBarry Smith        the number of columns in a->B and b->B may be different, hence we cannot call
1480cb5b572fSBarry Smith        the MatCopy() directly on the two parts. If need be, we can provide a more
1481cb5b572fSBarry Smith        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1482cb5b572fSBarry Smith        then copying the submatrices */
1483cb5b572fSBarry Smith     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1484cb5b572fSBarry Smith   } else {
1485cb5b572fSBarry Smith     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1486cb5b572fSBarry Smith     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1487cb5b572fSBarry Smith   }
1488cb5b572fSBarry Smith   PetscFunctionReturn(0);
1489cb5b572fSBarry Smith }
1490cb5b572fSBarry Smith 
14912e8a6d31SBarry Smith extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
14922f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
14930a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
14947b2a1423SBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
14957b2a1423SBarry Smith extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
149600e6dbe6SBarry Smith 
14978a729477SBarry Smith /* -------------------------------------------------------------------*/
1498cda55fadSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1499cda55fadSBarry Smith        MatGetRow_MPIAIJ,
1500cda55fadSBarry Smith        MatRestoreRow_MPIAIJ,
1501cda55fadSBarry Smith        MatMult_MPIAIJ,
1502cda55fadSBarry Smith        MatMultAdd_MPIAIJ,
1503cda55fadSBarry Smith        MatMultTrans_MPIAIJ,
1504cda55fadSBarry Smith        MatMultTransAdd_MPIAIJ,
1505cda55fadSBarry Smith        0,
1506cda55fadSBarry Smith        0,
1507cda55fadSBarry Smith        0,
1508cda55fadSBarry Smith        0,
1509cda55fadSBarry Smith        0,
1510cda55fadSBarry Smith        0,
151144a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1512b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1513cda55fadSBarry Smith        MatGetInfo_MPIAIJ,
1514cda55fadSBarry Smith        MatEqual_MPIAIJ,
1515cda55fadSBarry Smith        MatGetDiagonal_MPIAIJ,
1516cda55fadSBarry Smith        MatDiagonalScale_MPIAIJ,
1517cda55fadSBarry Smith        MatNorm_MPIAIJ,
1518cda55fadSBarry Smith        MatAssemblyBegin_MPIAIJ,
1519cda55fadSBarry Smith        MatAssemblyEnd_MPIAIJ,
15201eb62cbbSBarry Smith        0,
1521cda55fadSBarry Smith        MatSetOption_MPIAIJ,
1522cda55fadSBarry Smith        MatZeroEntries_MPIAIJ,
1523cda55fadSBarry Smith        MatZeroRows_MPIAIJ,
1524cda55fadSBarry Smith        0,
1525cda55fadSBarry Smith        0,
1526cda55fadSBarry Smith        0,
1527cda55fadSBarry Smith        0,
1528cda55fadSBarry Smith        MatGetSize_MPIAIJ,
1529cda55fadSBarry Smith        MatGetLocalSize_MPIAIJ,
1530cda55fadSBarry Smith        MatGetOwnershipRange_MPIAIJ,
1531cda55fadSBarry Smith        0,
1532cda55fadSBarry Smith        0,
1533cda55fadSBarry Smith        0,
1534cda55fadSBarry Smith        0,
15352e8a6d31SBarry Smith        MatDuplicate_MPIAIJ,
1536cda55fadSBarry Smith        0,
1537cda55fadSBarry Smith        0,
1538cda55fadSBarry Smith        0,
1539cda55fadSBarry Smith        0,
1540cda55fadSBarry Smith        0,
1541cda55fadSBarry Smith        MatGetSubMatrices_MPIAIJ,
1542cda55fadSBarry Smith        MatIncreaseOverlap_MPIAIJ,
1543cda55fadSBarry Smith        MatGetValues_MPIAIJ,
1544cb5b572fSBarry Smith        MatCopy_MPIAIJ,
1545052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
1546cda55fadSBarry Smith        MatScale_MPIAIJ,
1547cda55fadSBarry Smith        0,
1548cda55fadSBarry Smith        0,
1549cda55fadSBarry Smith        0,
1550cda55fadSBarry Smith        MatGetBlockSize_MPIAIJ,
1551cda55fadSBarry Smith        0,
1552cda55fadSBarry Smith        0,
1553cda55fadSBarry Smith        0,
1554cda55fadSBarry Smith        0,
1555cda55fadSBarry Smith        MatFDColoringCreate_MPIAIJ,
1556cda55fadSBarry Smith        0,
1557cda55fadSBarry Smith        MatSetUnfactored_MPIAIJ,
1558cda55fadSBarry Smith        0,
1559cda55fadSBarry Smith        0,
1560cda55fadSBarry Smith        MatGetSubMatrix_MPIAIJ,
1561cda55fadSBarry Smith        0,
1562cda55fadSBarry Smith        0,
1563cda55fadSBarry Smith        MatGetMaps_Petsc};
156436ce4990SBarry Smith 
15652e8a6d31SBarry Smith /* ----------------------------------------------------------------------------------------*/
15662e8a6d31SBarry Smith 
1567fb2e594dSBarry Smith EXTERN_C_BEGIN
15682e8a6d31SBarry Smith #undef __FUNC__
15692e8a6d31SBarry Smith #define __FUNC__ "MatStoreValues_MPIAIJ"
15702e8a6d31SBarry Smith int MatStoreValues_MPIAIJ(Mat mat)
15712e8a6d31SBarry Smith {
15722e8a6d31SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
15732e8a6d31SBarry Smith   int        ierr;
15742e8a6d31SBarry Smith 
15752e8a6d31SBarry Smith   PetscFunctionBegin;
15762e8a6d31SBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
15772e8a6d31SBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
15782e8a6d31SBarry Smith   PetscFunctionReturn(0);
15792e8a6d31SBarry Smith }
1580fb2e594dSBarry Smith EXTERN_C_END
15812e8a6d31SBarry Smith 
1582fb2e594dSBarry Smith EXTERN_C_BEGIN
15832e8a6d31SBarry Smith #undef __FUNC__
15842e8a6d31SBarry Smith #define __FUNC__ "MatRetrieveValues_MPIAIJ"
15852e8a6d31SBarry Smith int MatRetrieveValues_MPIAIJ(Mat mat)
15862e8a6d31SBarry Smith {
15872e8a6d31SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
15882e8a6d31SBarry Smith   int        ierr;
15892e8a6d31SBarry Smith 
15902e8a6d31SBarry Smith   PetscFunctionBegin;
15912e8a6d31SBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
15922e8a6d31SBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
15932e8a6d31SBarry Smith   PetscFunctionReturn(0);
15942e8a6d31SBarry Smith }
1595fb2e594dSBarry Smith EXTERN_C_END
15968a729477SBarry Smith 
159727508adbSBarry Smith #include "pc.h"
159827508adbSBarry Smith EXTERN_C_BEGIN
15995ef9f2a5SBarry Smith extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
160027508adbSBarry Smith EXTERN_C_END
160127508adbSBarry Smith 
16025615d1e5SSatish Balay #undef __FUNC__
16035615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ"
16041987afe7SBarry Smith /*@C
1605ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
16063a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
16073a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
16083a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
16093a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
16108a729477SBarry Smith 
1611db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1612db81eaa0SLois Curfman McInnes 
16138a729477SBarry Smith    Input Parameters:
1614db81eaa0SLois Curfman McInnes +  comm - MPI communicator
16157d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
161692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
161792e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
16181a3896d6SBarry Smith .  n - This value should be the same as the local size used in creating the
16191a3896d6SBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
16201a3896d6SBarry Smith        calculated if N is given) For square matrices n is almost always m.
162160d380a7SBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
162260d380a7SBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1623af1d9917SSatish Balay .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1624af1d9917SSatish Balay            (same value is used for all local rows)
1625af1d9917SSatish Balay .  d_nnz - array containing the number of nonzeros in the various rows of the
1626af1d9917SSatish Balay            DIAGONAL portion of the local submatrix (possibly different for each row)
1627af1d9917SSatish Balay            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1628af1d9917SSatish Balay            The size of this array is equal to the number of local rows, i.e 'm'.
1629af1d9917SSatish Balay            You must leave room for the diagonal entry even if it is zero.
1630af1d9917SSatish Balay .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1631af1d9917SSatish Balay            submatrix (same value is used for all local rows).
1632af1d9917SSatish Balay -  o_nnz - array containing the number of nonzeros in the various rows of the
1633af1d9917SSatish Balay            OFF-DIAGONAL portion of the local submatrix (possibly different for
1634af1d9917SSatish Balay            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1635af1d9917SSatish Balay            structure. The size of this array is equal to the number
1636af1d9917SSatish Balay            of local rows, i.e 'm'.
16378a729477SBarry Smith 
1638ff756334SLois Curfman McInnes    Output Parameter:
163944cd7ae7SLois Curfman McInnes .  A - the matrix
16408a729477SBarry Smith 
1641b259b22eSLois Curfman McInnes    Notes:
1642af1d9917SSatish Balay    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1643af1d9917SSatish Balay    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1644af1d9917SSatish Balay    storage requirements for this matrix.
1645af1d9917SSatish Balay 
1646af1d9917SSatish Balay    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1647af1d9917SSatish Balay    processor than it must be used on all processors that share the object for
1648af1d9917SSatish Balay    that argument.
1649be79a94dSBarry Smith 
1650ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1651ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
16520002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
16530002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1654ff756334SLois Curfman McInnes 
1655ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1656ff756334SLois Curfman McInnes    (possibly both).
1657ff756334SLois Curfman McInnes 
1658af1d9917SSatish Balay    The parallel matrix is partitioned such that the first m0 rows belong to
1659af1d9917SSatish Balay    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1660af1d9917SSatish Balay    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1661af1d9917SSatish Balay 
1662af1d9917SSatish Balay    The DIAGONAL portion of the local submatrix of a processor can be defined
1663af1d9917SSatish Balay    as the submatrix which is obtained by extraction the part corresponding
1664af1d9917SSatish Balay    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1665af1d9917SSatish Balay    first row that belongs to the processor, and r2 is the last row belonging
1666af1d9917SSatish Balay    to the this processor. This is a square mxm matrix. The remaining portion
1667af1d9917SSatish Balay    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1668af1d9917SSatish Balay 
1669af1d9917SSatish Balay    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1670af1d9917SSatish Balay 
16715511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
16725511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
16735511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
16745511cfe3SLois Curfman McInnes 
16755511cfe3SLois Curfman McInnes    Options Database Keys:
1676db81eaa0SLois Curfman McInnes +  -mat_aij_no_inode  - Do not use inodes
1677db81eaa0SLois Curfman McInnes .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1678db81eaa0SLois Curfman McInnes -  -mat_aij_oneindex - Internally use indexing starting at 1
1679db81eaa0SLois Curfman McInnes         rather than 0.  Note that when calling MatSetValues(),
1680db81eaa0SLois Curfman McInnes         the user still MUST index entries starting at 0!
1681494eafd4SBarry Smith .   -mat_mpi - use the parallel matrix data structures even on one processor
1682494eafd4SBarry Smith                (defaults to using SeqBAIJ format on one processor)
1683494eafd4SBarry Smith .   -mat_mpi - use the parallel matrix data structures even on one processor
1684494eafd4SBarry Smith                (defaults to using SeqAIJ format on one processor)
1685494eafd4SBarry Smith 
16865511cfe3SLois Curfman McInnes 
1687af1d9917SSatish Balay    Example usage:
1688e0245417SLois Curfman McInnes 
1689af1d9917SSatish Balay    Consider the following 8x8 matrix with 34 non-zero values, that is
1690af1d9917SSatish Balay    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1691af1d9917SSatish Balay    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1692af1d9917SSatish Balay    as follows:
16932191d07cSBarry Smith 
1694db81eaa0SLois Curfman McInnes .vb
1695af1d9917SSatish Balay             1  2  0  |  0  3  0  |  0  4
1696af1d9917SSatish Balay     Proc0   0  5  6  |  7  0  0  |  8  0
1697af1d9917SSatish Balay             9  0 10  | 11  0  0  | 12  0
1698af1d9917SSatish Balay     -------------------------------------
1699af1d9917SSatish Balay            13  0 14  | 15 16 17  |  0  0
1700af1d9917SSatish Balay     Proc1   0 18  0  | 19 20 21  |  0  0
1701af1d9917SSatish Balay             0  0  0  | 22 23  0  | 24  0
1702af1d9917SSatish Balay     -------------------------------------
1703af1d9917SSatish Balay     Proc2  25 26 27  |  0  0 28  | 29  0
1704af1d9917SSatish Balay            30  0  0  | 31 32 33  |  0 34
1705db81eaa0SLois Curfman McInnes .ve
1706b810aeb4SBarry Smith 
1707af1d9917SSatish Balay    This can be represented as a collection of submatrices as:
17085511cfe3SLois Curfman McInnes 
1709af1d9917SSatish Balay .vb
1710af1d9917SSatish Balay       A B C
1711af1d9917SSatish Balay       D E F
1712af1d9917SSatish Balay       G H I
1713af1d9917SSatish Balay .ve
1714af1d9917SSatish Balay 
1715af1d9917SSatish Balay    Where the submatrices A,B,C are owned by proc0, D,E,F are
1716af1d9917SSatish Balay    owned by proc1, G,H,I are owned by proc2.
1717af1d9917SSatish Balay 
1718af1d9917SSatish Balay    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1719af1d9917SSatish Balay    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1720af1d9917SSatish Balay    The 'M','N' parameters are 8,8, and have the same values on all procs.
1721af1d9917SSatish Balay 
1722af1d9917SSatish Balay    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1723af1d9917SSatish Balay    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1724af1d9917SSatish Balay    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1725af1d9917SSatish Balay    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1726af1d9917SSatish Balay    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1727af1d9917SSatish Balay    matrix, ans [DF] as another SeqAIJ matrix.
1728af1d9917SSatish Balay 
1729af1d9917SSatish Balay    When d_nz, o_nz parameters are specified, d_nz storage elements are
1730af1d9917SSatish Balay    allocated for every row of the local diagonal submatrix, and o_nz
1731af1d9917SSatish Balay    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1732af1d9917SSatish Balay    One way to choose d_nz and o_nz is to use the max nonzerors per local
1733af1d9917SSatish Balay    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1734af1d9917SSatish Balay    In this case, the values of d_nz,o_nz are:
1735af1d9917SSatish Balay .vb
1736af1d9917SSatish Balay      proc0 : dnz = 2, o_nz = 2
1737af1d9917SSatish Balay      proc1 : dnz = 3, o_nz = 2
1738af1d9917SSatish Balay      proc2 : dnz = 1, o_nz = 4
1739af1d9917SSatish Balay .ve
1740af1d9917SSatish Balay    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1741af1d9917SSatish Balay    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1742af1d9917SSatish Balay    for proc3. i.e we are using 12+15+10=37 storage locations to store
1743af1d9917SSatish Balay    34 values.
1744af1d9917SSatish Balay 
1745af1d9917SSatish Balay    When d_nnz, o_nnz parameters are specified, the storage is specified
1746af1d9917SSatish Balay    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1747af1d9917SSatish Balay    In the above case the values for d_nnz,o_nnz are:
1748af1d9917SSatish Balay .vb
1749af1d9917SSatish Balay      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1750af1d9917SSatish Balay      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1751af1d9917SSatish Balay      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1752af1d9917SSatish Balay .ve
1753af1d9917SSatish Balay    Here the space allocated is sum of all the above values i.e 34, and
1754af1d9917SSatish Balay    hence pre-allocation is perfect.
17553a511b96SLois Curfman McInnes 
1756027ccd11SLois Curfman McInnes    Level: intermediate
1757027ccd11SLois Curfman McInnes 
1758dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1759ff756334SLois Curfman McInnes 
1760fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
17618a729477SBarry Smith @*/
1762e1311b90SBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
17638a729477SBarry Smith {
176444cd7ae7SLois Curfman McInnes   Mat          B;
176544cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
1766eac7125bSBarry Smith   int          ierr, i,size,flag1 = 0, flag2 = 0;
1767416022c9SBarry Smith 
17683a40ed3dSBarry Smith   PetscFunctionBegin;
17691d55c564SBarry Smith 
1770b73539f3SBarry Smith   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
1771b73539f3SBarry Smith   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
17721d55c564SBarry Smith   if (d_nnz) {
17731d55c564SBarry Smith     for (i=0; i<m; i++) {
1774b73539f3SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
17751d55c564SBarry Smith     }
17761d55c564SBarry Smith   }
17771d55c564SBarry Smith   if (o_nnz) {
17781d55c564SBarry Smith     for (i=0; i<m; i++) {
1779b73539f3SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
17801d55c564SBarry Smith     }
17811d55c564SBarry Smith   }
17821d55c564SBarry Smith 
17831dab6e02SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
17847be0e774SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
17857be0e774SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
17867be0e774SBarry Smith   if (!flag1 && !flag2 && size == 1) {
17873914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
17883914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
17893914022bSBarry Smith     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
17903a40ed3dSBarry Smith     PetscFunctionReturn(0);
17913914022bSBarry Smith   }
17923914022bSBarry Smith 
179344cd7ae7SLois Curfman McInnes   *A = 0;
17943f1db9ecSBarry Smith   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
179544cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
179644cd7ae7SLois Curfman McInnes   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1797549d3d68SSatish Balay   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1798549d3d68SSatish Balay   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1799e1311b90SBarry Smith   B->ops->destroy = MatDestroy_MPIAIJ;
1800e1311b90SBarry Smith   B->ops->view    = MatView_MPIAIJ;
180144cd7ae7SLois Curfman McInnes   B->factor       = 0;
180244cd7ae7SLois Curfman McInnes   B->assembled    = PETSC_FALSE;
180390f02eecSBarry Smith   B->mapping      = 0;
1804d6dfbf8fSBarry Smith 
180547794344SBarry Smith   B->insertmode      = NOT_SET_VALUES;
18069eb4d147SSatish Balay   b->size            = size;
18071dab6e02SBarry Smith   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
18081eb62cbbSBarry Smith 
18093a40ed3dSBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1810a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
18113a40ed3dSBarry Smith   }
18121987afe7SBarry Smith 
1813eac7125bSBarry Smith   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1814eac7125bSBarry Smith   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
181544cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
181644cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
181744cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
181844cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
18191eb62cbbSBarry Smith 
1820c7fcc2eaSBarry Smith   /* the information in the maps duplicates the information computed below, eventually
1821c7fcc2eaSBarry Smith      we should remove the duplicate information that is not contained in the maps */
1822253a16ddSBarry Smith   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1823253a16ddSBarry Smith   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1824c7fcc2eaSBarry Smith 
18251eb62cbbSBarry Smith   /* build local table of row and column ownerships */
182644cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1827f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1828603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
1829ca161407SBarry Smith   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
183044cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
183144cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
183244cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
18338a729477SBarry Smith   }
183444cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
183544cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
1836ca161407SBarry Smith   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
183744cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
183844cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
183944cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
18408a729477SBarry Smith   }
184144cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
184244cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
18438a729477SBarry Smith 
18445ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1845029af93fSBarry Smith   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
184644cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
18477b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1848029af93fSBarry Smith   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
184944cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
18508a729477SBarry Smith 
18511eb62cbbSBarry Smith   /* build cache for off array entries formed */
18528798bf22SSatish Balay   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
185390f02eecSBarry Smith   b->donotstash  = 0;
185444cd7ae7SLois Curfman McInnes   b->colmap      = 0;
185544cd7ae7SLois Curfman McInnes   b->garray      = 0;
185644cd7ae7SLois Curfman McInnes   b->roworiented = 1;
18578a729477SBarry Smith 
18581eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
185944cd7ae7SLois Curfman McInnes   b->lvec      = 0;
186044cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
18618a729477SBarry Smith 
18627a0afa10SBarry Smith   /* stuff for MatGetRow() */
186344cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
186444cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
186544cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
18667a0afa10SBarry Smith 
18672e8a6d31SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
18682e8a6d31SBarry Smith                                      "MatStoreValues_MPIAIJ",
18692e8a6d31SBarry Smith                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
18702e8a6d31SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
18712e8a6d31SBarry Smith                                      "MatRetrieveValues_MPIAIJ",
18722e8a6d31SBarry Smith                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
18735ef9f2a5SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
18745ef9f2a5SBarry Smith                                      "MatGetDiagonalBlock_MPIAIJ",
18755ef9f2a5SBarry Smith                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
187644cd7ae7SLois Curfman McInnes   *A = B;
18773a40ed3dSBarry Smith   PetscFunctionReturn(0);
1878d6dfbf8fSBarry Smith }
1879c74985f6SBarry Smith 
18805615d1e5SSatish Balay #undef __FUNC__
18812e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIAIJ"
18822e8a6d31SBarry Smith int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1883d6dfbf8fSBarry Smith {
1884d6dfbf8fSBarry Smith   Mat        mat;
1885416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1886a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1887d6dfbf8fSBarry Smith 
18883a40ed3dSBarry Smith   PetscFunctionBegin;
1889416022c9SBarry Smith   *newmat       = 0;
18903f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1891a5a9c739SBarry Smith   PLogObjectCreate(mat);
18920452661fSBarry Smith   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1893549d3d68SSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1894e1311b90SBarry Smith   mat->ops->destroy = MatDestroy_MPIAIJ;
1895e1311b90SBarry Smith   mat->ops->view    = MatView_MPIAIJ;
1896d6dfbf8fSBarry Smith   mat->factor       = matin->factor;
1897c456f294SBarry Smith   mat->assembled    = PETSC_TRUE;
1898e7641de0SSatish Balay   mat->insertmode   = NOT_SET_VALUES;
1899d6dfbf8fSBarry Smith 
190044cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
190144cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
190244cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
190344cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1904d6dfbf8fSBarry Smith 
1905416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1906416022c9SBarry Smith   a->rend         = oldmat->rend;
1907416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1908416022c9SBarry Smith   a->cend         = oldmat->cend;
190917699dbbSLois Curfman McInnes   a->size         = oldmat->size;
191017699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
1911e7641de0SSatish Balay   a->donotstash   = oldmat->donotstash;
1912e7641de0SSatish Balay   a->roworiented  = oldmat->roworiented;
1913e7641de0SSatish Balay   a->rowindices   = 0;
1914bcd2baecSBarry Smith   a->rowvalues    = 0;
1915bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1916d6dfbf8fSBarry Smith 
1917603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1918f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1919603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1920549d3d68SSatish Balay   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
19218798bf22SSatish Balay   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
19222ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
1923aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
19240f5bd95cSBarry Smith     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1925b1fc9764SSatish Balay #else
19260452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1927416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1928549d3d68SSatish Balay     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1929b1fc9764SSatish Balay #endif
1930416022c9SBarry Smith   } else a->colmap = 0;
19313f41c07dSBarry Smith   if (oldmat->garray) {
19323f41c07dSBarry Smith     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
19333f41c07dSBarry Smith     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1934464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1935549d3d68SSatish Balay     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1936416022c9SBarry Smith   } else a->garray = 0;
1937d6dfbf8fSBarry Smith 
1938416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1939416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1940a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1941416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
19422e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1943416022c9SBarry Smith   PLogObjectParent(mat,a->A);
19442e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1945416022c9SBarry Smith   PLogObjectParent(mat,a->B);
19465dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
194725cdf11fSBarry Smith   if (flg) {
1948682d7d0cSBarry Smith     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1949682d7d0cSBarry Smith   }
195027508adbSBarry Smith   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
19518a729477SBarry Smith   *newmat = mat;
19523a40ed3dSBarry Smith   PetscFunctionReturn(0);
19538a729477SBarry Smith }
1954416022c9SBarry Smith 
195577c4ece6SBarry Smith #include "sys.h"
1956416022c9SBarry Smith 
19575615d1e5SSatish Balay #undef __FUNC__
19585615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ"
195919bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1960416022c9SBarry Smith {
1961d65a2f8fSBarry Smith   Mat          A;
1962d65a2f8fSBarry Smith   Scalar       *vals,*svals;
196319bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1964416022c9SBarry Smith   MPI_Status   status;
19653a40ed3dSBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
196617699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1967d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1968b362ba68SBarry Smith   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1969416022c9SBarry Smith 
19703a40ed3dSBarry Smith   PetscFunctionBegin;
19711dab6e02SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
19721dab6e02SBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197317699dbbSLois Curfman McInnes   if (!rank) {
197490ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
19750752156aSBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1976a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1977d64ed03dSBarry Smith     if (header[3] < 0) {
1978a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1979d64ed03dSBarry Smith     }
19806c5fab8fSBarry Smith   }
19816c5fab8fSBarry Smith 
1982ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1983416022c9SBarry Smith   M = header[1]; N = header[2];
1984416022c9SBarry Smith   /* determine ownership of all rows */
198517699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
19860452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1987ca161407SBarry Smith   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1988416022c9SBarry Smith   rowners[0] = 0;
198917699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1990416022c9SBarry Smith     rowners[i] += rowners[i-1];
1991416022c9SBarry Smith   }
199217699dbbSLois Curfman McInnes   rstart = rowners[rank];
199317699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1994416022c9SBarry Smith 
1995416022c9SBarry Smith   /* distribute row lengths to all processors */
19960452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1997416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
199817699dbbSLois Curfman McInnes   if (!rank) {
19990452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
20000752156aSBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
20010452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
200217699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
2003ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2004606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
20053a40ed3dSBarry Smith   } else {
2006ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2007416022c9SBarry Smith   }
2008416022c9SBarry Smith 
200917699dbbSLois Curfman McInnes   if (!rank) {
2010416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
20110452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2012549d3d68SSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
201317699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
2014416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2015416022c9SBarry Smith         procsnz[i] += rowlengths[j];
2016416022c9SBarry Smith       }
2017416022c9SBarry Smith     }
2018606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2019416022c9SBarry Smith 
2020416022c9SBarry Smith     /* determine max buffer needed and allocate it */
2021416022c9SBarry Smith     maxnz = 0;
202217699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
20230452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
2024416022c9SBarry Smith     }
20250452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2026416022c9SBarry Smith 
2027416022c9SBarry Smith     /* read in my part of the matrix column indices  */
2028416022c9SBarry Smith     nz     = procsnz[0];
20290452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
20300752156aSBarry Smith     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2031d65a2f8fSBarry Smith 
2032d65a2f8fSBarry Smith     /* read in every one elses and ship off */
203317699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
2034d65a2f8fSBarry Smith       nz   = procsnz[i];
20350752156aSBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2036ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2037d65a2f8fSBarry Smith     }
2038606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
20393a40ed3dSBarry Smith   } else {
2040416022c9SBarry Smith     /* determine buffer space needed for message */
2041416022c9SBarry Smith     nz = 0;
2042416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
2043416022c9SBarry Smith       nz += ourlens[i];
2044416022c9SBarry Smith     }
20450452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2046416022c9SBarry Smith 
2047416022c9SBarry Smith     /* receive message of column indices*/
2048ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2049ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2050a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2051416022c9SBarry Smith   }
2052416022c9SBarry Smith 
2053b362ba68SBarry Smith   /* determine column ownership if matrix is not square */
2054b362ba68SBarry Smith   if (N != M) {
2055b362ba68SBarry Smith     n      = N/size + ((N % size) > rank);
2056b362ba68SBarry Smith     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2057b362ba68SBarry Smith     cstart = cend - n;
2058b362ba68SBarry Smith   } else {
2059b362ba68SBarry Smith     cstart = rstart;
2060b362ba68SBarry Smith     cend   = rend;
2061fb2e594dSBarry Smith     n      = cend - cstart;
2062b362ba68SBarry Smith   }
2063b362ba68SBarry Smith 
2064416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
2065549d3d68SSatish Balay   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2066416022c9SBarry Smith   jj = 0;
2067416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
2068416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
2069b362ba68SBarry Smith       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2070416022c9SBarry Smith       jj++;
2071416022c9SBarry Smith     }
2072416022c9SBarry Smith   }
2073d65a2f8fSBarry Smith 
2074d65a2f8fSBarry Smith   /* create our matrix */
2075416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
2076416022c9SBarry Smith     ourlens[i] -= offlens[i];
2077416022c9SBarry Smith   }
2078b362ba68SBarry Smith   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2079d65a2f8fSBarry Smith   A = *newmat;
2080fb2e594dSBarry Smith   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2081d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
2082d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
2083d65a2f8fSBarry Smith   }
2084416022c9SBarry Smith 
208517699dbbSLois Curfman McInnes   if (!rank) {
20860452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2087416022c9SBarry Smith 
2088416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
2089416022c9SBarry Smith     nz   = procsnz[0];
20900752156aSBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2091d65a2f8fSBarry Smith 
2092d65a2f8fSBarry Smith     /* insert into matrix */
2093d65a2f8fSBarry Smith     jj      = rstart;
2094d65a2f8fSBarry Smith     smycols = mycols;
2095d65a2f8fSBarry Smith     svals   = vals;
2096d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
2097d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2098d65a2f8fSBarry Smith       smycols += ourlens[i];
2099d65a2f8fSBarry Smith       svals   += ourlens[i];
2100d65a2f8fSBarry Smith       jj++;
2101416022c9SBarry Smith     }
2102416022c9SBarry Smith 
2103d65a2f8fSBarry Smith     /* read in other processors and ship out */
210417699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
2105416022c9SBarry Smith       nz   = procsnz[i];
21060752156aSBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2107ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2108416022c9SBarry Smith     }
2109606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
21103a40ed3dSBarry Smith   } else {
2111d65a2f8fSBarry Smith     /* receive numeric values */
21120452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2113416022c9SBarry Smith 
2114d65a2f8fSBarry Smith     /* receive message of values*/
2115ca161407SBarry Smith     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2116ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2117a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2118d65a2f8fSBarry Smith 
2119d65a2f8fSBarry Smith     /* insert into matrix */
2120d65a2f8fSBarry Smith     jj      = rstart;
2121d65a2f8fSBarry Smith     smycols = mycols;
2122d65a2f8fSBarry Smith     svals   = vals;
2123d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
2124d65a2f8fSBarry Smith       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2125d65a2f8fSBarry Smith       smycols += ourlens[i];
2126d65a2f8fSBarry Smith       svals   += ourlens[i];
2127d65a2f8fSBarry Smith       jj++;
2128d65a2f8fSBarry Smith     }
2129d65a2f8fSBarry Smith   }
2130606d414cSSatish Balay   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2131606d414cSSatish Balay   ierr = PetscFree(vals);CHKERRQ(ierr);
2132606d414cSSatish Balay   ierr = PetscFree(mycols);CHKERRQ(ierr);
2133606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2134d65a2f8fSBarry Smith 
21356d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
21366d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
21373a40ed3dSBarry Smith   PetscFunctionReturn(0);
2138416022c9SBarry Smith }
2139a0ff6018SBarry Smith 
214029da9460SBarry Smith #undef __FUNC__
214129da9460SBarry Smith #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2142a0ff6018SBarry Smith /*
214329da9460SBarry Smith     Not great since it makes two copies of the submatrix, first an SeqAIJ
214429da9460SBarry Smith   in local and then by concatenating the local matrices the end result.
214529da9460SBarry Smith   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2146a0ff6018SBarry Smith */
21477b2a1423SBarry Smith int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2148a0ff6018SBarry Smith {
214900e6dbe6SBarry Smith   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2150fee21e36SBarry Smith   Mat        *local,M, Mreuse;
215100e6dbe6SBarry Smith   Scalar     *vwork,*aa;
215200e6dbe6SBarry Smith   MPI_Comm   comm = mat->comm;
215300e6dbe6SBarry Smith   Mat_SeqAIJ *aij;
215400e6dbe6SBarry Smith   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2155a0ff6018SBarry Smith 
2156a0ff6018SBarry Smith   PetscFunctionBegin;
21571dab6e02SBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
21581dab6e02SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
215900e6dbe6SBarry Smith 
2160fee21e36SBarry Smith   if (call ==  MAT_REUSE_MATRIX) {
2161fee21e36SBarry Smith     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2162fee21e36SBarry Smith     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2163fee21e36SBarry Smith     local = &Mreuse;
2164fee21e36SBarry Smith     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2165fee21e36SBarry Smith   } else {
2166a0ff6018SBarry Smith     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2167fee21e36SBarry Smith     Mreuse = *local;
2168606d414cSSatish Balay     ierr = PetscFree(local);CHKERRQ(ierr);
2169fee21e36SBarry Smith   }
2170a0ff6018SBarry Smith 
2171a0ff6018SBarry Smith   /*
2172a0ff6018SBarry Smith       m - number of local rows
2173a0ff6018SBarry Smith       n - number of columns (same on all processors)
2174a0ff6018SBarry Smith       rstart - first row in new global matrix generated
2175a0ff6018SBarry Smith   */
2176fee21e36SBarry Smith   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2177a0ff6018SBarry Smith   if (call == MAT_INITIAL_MATRIX) {
2178fee21e36SBarry Smith     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2179a8c6a408SBarry Smith     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
218000e6dbe6SBarry Smith     ii  = aij->i;
218100e6dbe6SBarry Smith     jj  = aij->j;
218200e6dbe6SBarry Smith 
2183a0ff6018SBarry Smith     /*
218400e6dbe6SBarry Smith         Determine the number of non-zeros in the diagonal and off-diagonal
218500e6dbe6SBarry Smith         portions of the matrix in order to do correct preallocation
2186a0ff6018SBarry Smith     */
218700e6dbe6SBarry Smith 
218800e6dbe6SBarry Smith     /* first get start and end of "diagonal" columns */
21896a6a5d1dSBarry Smith     if (csize == PETSC_DECIDE) {
219000e6dbe6SBarry Smith       nlocal = n/size + ((n % size) > rank);
21916a6a5d1dSBarry Smith     } else {
21926a6a5d1dSBarry Smith       nlocal = csize;
21936a6a5d1dSBarry Smith     }
2194ca161407SBarry Smith     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
219500e6dbe6SBarry Smith     rstart = rend - nlocal;
21966a6a5d1dSBarry Smith     if (rank == size - 1 && rend != n) {
21976a6a5d1dSBarry Smith       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
21986a6a5d1dSBarry Smith     }
219900e6dbe6SBarry Smith 
220000e6dbe6SBarry Smith     /* next, compute all the lengths */
220100e6dbe6SBarry Smith     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
220200e6dbe6SBarry Smith     olens = dlens + m;
220300e6dbe6SBarry Smith     for ( i=0; i<m; i++ ) {
220400e6dbe6SBarry Smith       jend = ii[i+1] - ii[i];
220500e6dbe6SBarry Smith       olen = 0;
220600e6dbe6SBarry Smith       dlen = 0;
220700e6dbe6SBarry Smith       for ( j=0; j<jend; j++ ) {
220800e6dbe6SBarry Smith         if ( *jj < rstart || *jj >= rend) olen++;
220900e6dbe6SBarry Smith         else dlen++;
221000e6dbe6SBarry Smith         jj++;
221100e6dbe6SBarry Smith       }
221200e6dbe6SBarry Smith       olens[i] = olen;
221300e6dbe6SBarry Smith       dlens[i] = dlen;
221400e6dbe6SBarry Smith     }
22152d207970SBarry Smith     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2216606d414cSSatish Balay     ierr = PetscFree(dlens);CHKERRQ(ierr);
2217a0ff6018SBarry Smith   } else {
2218a0ff6018SBarry Smith     int ml,nl;
2219a0ff6018SBarry Smith 
2220a0ff6018SBarry Smith     M = *newmat;
2221a0ff6018SBarry Smith     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2222a8c6a408SBarry Smith     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2223a0ff6018SBarry Smith     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2224c48de900SBarry Smith     /*
2225c48de900SBarry Smith          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2226c48de900SBarry Smith        rather than the slower MatSetValues().
2227c48de900SBarry Smith     */
2228c48de900SBarry Smith     M->was_assembled = PETSC_TRUE;
2229c48de900SBarry Smith     M->assembled     = PETSC_FALSE;
2230a0ff6018SBarry Smith   }
2231a0ff6018SBarry Smith   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2232fee21e36SBarry Smith   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2233a8c6a408SBarry Smith   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
223400e6dbe6SBarry Smith   ii  = aij->i;
223500e6dbe6SBarry Smith   jj  = aij->j;
223600e6dbe6SBarry Smith   aa  = aij->a;
2237a0ff6018SBarry Smith   for (i=0; i<m; i++) {
2238a0ff6018SBarry Smith     row   = rstart + i;
223900e6dbe6SBarry Smith     nz    = ii[i+1] - ii[i];
224000e6dbe6SBarry Smith     cwork = jj;     jj += nz;
224100e6dbe6SBarry Smith     vwork = aa;     aa += nz;
22428c638d02SBarry Smith     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2243a0ff6018SBarry Smith   }
2244a0ff6018SBarry Smith 
2245a0ff6018SBarry Smith   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2246a0ff6018SBarry Smith   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2247a0ff6018SBarry Smith   *newmat = M;
2248fee21e36SBarry Smith 
2249fee21e36SBarry Smith   /* save submatrix used in processor for next request */
2250fee21e36SBarry Smith   if (call ==  MAT_INITIAL_MATRIX) {
2251fee21e36SBarry Smith     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2252fee21e36SBarry Smith     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2253fee21e36SBarry Smith   }
2254fee21e36SBarry Smith 
2255a0ff6018SBarry Smith   PetscFunctionReturn(0);
2256a0ff6018SBarry Smith }
2257