xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ba4e3ef2db0ec5ace479b1ffc67b6207dc0fbe6f)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*ba4e3ef2SSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.209 1997/07/09 20:54:04 balay Exp balay $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
8d9942c19SSatish Balay #include "src/inline/spops.h"
98a729477SBarry Smith 
109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
129e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
139e25ed09SBarry Smith length of colmap equals the global matrix length.
149e25ed09SBarry Smith */
155615d1e5SSatish Balay #undef __FUNC__
165eea60f9SBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */
170a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat)
189e25ed09SBarry Smith {
1944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21905e6a2fSBarry Smith   int        n = B->n,i;
22dbb450caSBarry Smith 
23758f045eSSatish Balay   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
24464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
25cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
26905e6a2fSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
279e25ed09SBarry Smith   return 0;
289e25ed09SBarry Smith }
299e25ed09SBarry Smith 
302493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
312493cbb0SBarry Smith 
325615d1e5SSatish Balay #undef __FUNC__
335615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIAIJ"
348f6be9afSLois Curfman McInnes int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
353b2fbd54SBarry Smith                            PetscTruth *done)
36299609e3SLois Curfman McInnes {
37299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
38299609e3SLois Curfman McInnes   int        ierr;
3917699dbbSLois Curfman McInnes   if (aij->size == 1) {
403b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
41e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
423b2fbd54SBarry Smith   return 0;
433b2fbd54SBarry Smith }
443b2fbd54SBarry Smith 
455615d1e5SSatish Balay #undef __FUNC__
465eea60f9SBarry Smith #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */
478f6be9afSLois Curfman McInnes int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
483b2fbd54SBarry Smith                                PetscTruth *done)
493b2fbd54SBarry Smith {
503b2fbd54SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
513b2fbd54SBarry Smith   int        ierr;
523b2fbd54SBarry Smith   if (aij->size == 1) {
533b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
55299609e3SLois Curfman McInnes   return 0;
56299609e3SLois Curfman McInnes }
57299609e3SLois Curfman McInnes 
580520107fSSatish Balay #define CHUNKSIZE   15
5930770e4dSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
600520107fSSatish Balay { \
610520107fSSatish Balay  \
620520107fSSatish Balay     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
6330770e4dSSatish Balay     rmax = aimax[row]; nrow = ailen[row];  \
64f5e9677aSSatish Balay     col1 = col - shift; \
65f5e9677aSSatish Balay      \
66*ba4e3ef2SSatish Balay     low = 0; high = nrow; \
67*ba4e3ef2SSatish Balay     while (high-low > 5) { \
68*ba4e3ef2SSatish Balay       t = (low+high)/2; \
69*ba4e3ef2SSatish Balay       if (rp[t] > col) high = t; \
70*ba4e3ef2SSatish Balay       else             low  = t; \
71*ba4e3ef2SSatish Balay     } \
720520107fSSatish Balay       for ( _i=0; _i<nrow; _i++ ) { \
73f5e9677aSSatish Balay         if (rp[_i] > col1) break; \
74f5e9677aSSatish Balay         if (rp[_i] == col1) { \
750520107fSSatish Balay           if (addv == ADD_VALUES) ap[_i] += value;   \
760520107fSSatish Balay           else                  ap[_i] = value; \
7730770e4dSSatish Balay           goto a_noinsert; \
780520107fSSatish Balay         } \
790520107fSSatish Balay       }  \
8089280ab3SLois Curfman McInnes       if (nonew == 1) goto a_noinsert; \
8111ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
820520107fSSatish Balay       if (nrow >= rmax) { \
830520107fSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
840520107fSSatish Balay         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
850520107fSSatish Balay         Scalar *new_a; \
860520107fSSatish Balay  \
8711ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
8889280ab3SLois Curfman McInnes  \
890520107fSSatish Balay         /* malloc new storage space */ \
900520107fSSatish Balay         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
910520107fSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
920520107fSSatish Balay         new_j   = (int *) (new_a + new_nz); \
930520107fSSatish Balay         new_i   = new_j + new_nz; \
940520107fSSatish Balay  \
950520107fSSatish Balay         /* copy over old data into new slots */ \
960520107fSSatish Balay         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
970520107fSSatish Balay         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
980520107fSSatish Balay         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
990520107fSSatish Balay         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
1000520107fSSatish Balay         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
1010520107fSSatish Balay                                                            len*sizeof(int)); \
1020520107fSSatish Balay         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
1030520107fSSatish Balay         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
1040520107fSSatish Balay                                                            len*sizeof(Scalar));  \
1050520107fSSatish Balay         /* free up old matrix storage */ \
106f5e9677aSSatish Balay  \
1070520107fSSatish Balay         PetscFree(a->a);  \
1080520107fSSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
1090520107fSSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1100520107fSSatish Balay         a->singlemalloc = 1; \
1110520107fSSatish Balay  \
1120520107fSSatish Balay         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
11330770e4dSSatish Balay         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
1140520107fSSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
1150520107fSSatish Balay         a->maxnz += CHUNKSIZE; \
1160520107fSSatish Balay         a->reallocs++; \
1170520107fSSatish Balay       } \
1180520107fSSatish Balay       N = nrow++ - 1; a->nz++; \
1190520107fSSatish Balay       /* shift up all the later entries in this row */ \
1200520107fSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
1210520107fSSatish Balay         rp[ii+1] = rp[ii]; \
1220520107fSSatish Balay         ap[ii+1] = ap[ii]; \
1230520107fSSatish Balay       } \
124f5e9677aSSatish Balay       rp[_i] = col1;  \
1250520107fSSatish Balay       ap[_i] = value;  \
12630770e4dSSatish Balay       a_noinsert: ; \
1270520107fSSatish Balay       ailen[row] = nrow; \
1280520107fSSatish Balay }
1290a198c4cSBarry Smith 
13030770e4dSSatish Balay #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
13130770e4dSSatish Balay { \
13230770e4dSSatish Balay  \
13330770e4dSSatish Balay     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
13430770e4dSSatish Balay     rmax = bimax[row]; nrow = bilen[row];  \
13530770e4dSSatish Balay     col1 = col - shift; \
13630770e4dSSatish Balay      \
137*ba4e3ef2SSatish Balay     low = 0; high = nrow; \
138*ba4e3ef2SSatish Balay     while (high-low > 5) { \
139*ba4e3ef2SSatish Balay       t = (low+high)/2; \
140*ba4e3ef2SSatish Balay       if (rp[t] > col) high = t; \
141*ba4e3ef2SSatish Balay       else             low  = t; \
142*ba4e3ef2SSatish Balay     } \
14330770e4dSSatish Balay        for ( _i=0; _i<nrow; _i++ ) { \
14430770e4dSSatish Balay         if (rp[_i] > col1) break; \
14530770e4dSSatish Balay         if (rp[_i] == col1) { \
14630770e4dSSatish Balay           if (addv == ADD_VALUES) ap[_i] += value;   \
14730770e4dSSatish Balay           else                  ap[_i] = value; \
14830770e4dSSatish Balay           goto b_noinsert; \
14930770e4dSSatish Balay         } \
15030770e4dSSatish Balay       }  \
15189280ab3SLois Curfman McInnes       if (nonew == 1) goto b_noinsert; \
15211ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
15330770e4dSSatish Balay       if (nrow >= rmax) { \
15430770e4dSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
15574c639caSSatish Balay         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
15630770e4dSSatish Balay         Scalar *new_a; \
15730770e4dSSatish Balay  \
15811ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
15989280ab3SLois Curfman McInnes  \
16030770e4dSSatish Balay         /* malloc new storage space */ \
16174c639caSSatish Balay         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
16230770e4dSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
16330770e4dSSatish Balay         new_j   = (int *) (new_a + new_nz); \
16430770e4dSSatish Balay         new_i   = new_j + new_nz; \
16530770e4dSSatish Balay  \
16630770e4dSSatish Balay         /* copy over old data into new slots */ \
16730770e4dSSatish Balay         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
16874c639caSSatish Balay         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
16930770e4dSSatish Balay         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
17030770e4dSSatish Balay         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
17130770e4dSSatish Balay         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
17230770e4dSSatish Balay                                                            len*sizeof(int)); \
17330770e4dSSatish Balay         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
17430770e4dSSatish Balay         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
17530770e4dSSatish Balay                                                            len*sizeof(Scalar));  \
17630770e4dSSatish Balay         /* free up old matrix storage */ \
17730770e4dSSatish Balay  \
17874c639caSSatish Balay         PetscFree(b->a);  \
17974c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
18074c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
18174c639caSSatish Balay         b->singlemalloc = 1; \
18230770e4dSSatish Balay  \
18330770e4dSSatish Balay         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
18430770e4dSSatish Balay         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
18574c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
18674c639caSSatish Balay         b->maxnz += CHUNKSIZE; \
18774c639caSSatish Balay         b->reallocs++; \
18830770e4dSSatish Balay       } \
18974c639caSSatish Balay       N = nrow++ - 1; b->nz++; \
19030770e4dSSatish Balay       /* shift up all the later entries in this row */ \
19130770e4dSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
19230770e4dSSatish Balay         rp[ii+1] = rp[ii]; \
19330770e4dSSatish Balay         ap[ii+1] = ap[ii]; \
19430770e4dSSatish Balay       } \
19530770e4dSSatish Balay       rp[_i] = col1;  \
19630770e4dSSatish Balay       ap[_i] = value;  \
19730770e4dSSatish Balay       b_noinsert: ; \
19830770e4dSSatish Balay       bilen[row] = nrow; \
19930770e4dSSatish Balay }
20030770e4dSSatish Balay 
2010520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
2025615d1e5SSatish Balay #undef __FUNC__
2035615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ"
2048f6be9afSLois Curfman McInnes int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
2058a729477SBarry Smith {
20644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
2074b0e389bSBarry Smith   Scalar     value;
2081eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
2091eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
210905e6a2fSBarry Smith   int        roworiented = aij->roworiented;
2118a729477SBarry Smith 
2120520107fSSatish Balay   /* Some Variables required in the macro */
2134ee7247eSSatish Balay   Mat        A = aij->A;
2144ee7247eSSatish Balay   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
21530770e4dSSatish Balay   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
21630770e4dSSatish Balay   Scalar     *aa = a->a;
21730770e4dSSatish Balay 
21830770e4dSSatish Balay   Mat        B = aij->B;
21930770e4dSSatish Balay   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
22030770e4dSSatish Balay   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
22130770e4dSSatish Balay   Scalar     *ba = b->a;
22230770e4dSSatish Balay 
223*ba4e3ef2SSatish Balay   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
22430770e4dSSatish Balay   int        nonew = a->nonew,shift = a->indexshift;
22530770e4dSSatish Balay   Scalar     *ap;
2264ee7247eSSatish Balay 
2278a729477SBarry Smith   for ( i=0; i<m; i++ ) {
2280a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
229e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
230e3372554SBarry Smith     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
2310a198c4cSBarry Smith #endif
2324b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
2334b0e389bSBarry Smith       row = im[i] - rstart;
2341eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
2354b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
2364b0e389bSBarry Smith           col = in[j] - cstart;
2374b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
23830770e4dSSatish Balay           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
2390520107fSSatish Balay           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
2401eb62cbbSBarry Smith         }
2410a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
242e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
243e3372554SBarry Smith         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
2440a198c4cSBarry Smith #endif
2451eb62cbbSBarry Smith         else {
246227d817aSBarry Smith           if (mat->was_assembled) {
247905e6a2fSBarry Smith             if (!aij->colmap) {
248905e6a2fSBarry Smith               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
249905e6a2fSBarry Smith             }
250905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
251ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2522493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2534b0e389bSBarry Smith               col =  in[j];
2549bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
255f9508a3cSSatish Balay               B = aij->B;
256f9508a3cSSatish Balay               b = (Mat_SeqAIJ *) B->data;
257f9508a3cSSatish Balay               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
258f9508a3cSSatish Balay               ba = b->a;
259d6dfbf8fSBarry Smith             }
2609e25ed09SBarry Smith           }
2614b0e389bSBarry Smith           else col = in[j];
2624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
26330770e4dSSatish Balay           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
26430770e4dSSatish Balay           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
2651eb62cbbSBarry Smith         }
2661eb62cbbSBarry Smith       }
2671eb62cbbSBarry Smith     }
2681eb62cbbSBarry Smith     else {
26990f02eecSBarry Smith       if (roworiented && !aij->donotstash) {
2704b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
2714b0e389bSBarry Smith       }
2724b0e389bSBarry Smith       else {
27390f02eecSBarry Smith         if (!aij->donotstash) {
2744b0e389bSBarry Smith           row = im[i];
2754b0e389bSBarry Smith           for ( j=0; j<n; j++ ) {
2764b0e389bSBarry Smith             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
2774b0e389bSBarry Smith           }
2784b0e389bSBarry Smith         }
2791eb62cbbSBarry Smith       }
2808a729477SBarry Smith     }
28190f02eecSBarry Smith   }
2828a729477SBarry Smith   return 0;
2838a729477SBarry Smith }
2848a729477SBarry Smith 
2855615d1e5SSatish Balay #undef __FUNC__
2865615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ"
2878f6be9afSLois Curfman McInnes int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
288b49de8d1SLois Curfman McInnes {
289b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
290b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
291b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
292b49de8d1SLois Curfman McInnes 
293b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
294e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
295e3372554SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
296b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
297b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
298b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
299e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
300e3372554SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
301b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
302b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
303b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
304b49de8d1SLois Curfman McInnes         }
305b49de8d1SLois Curfman McInnes         else {
306905e6a2fSBarry Smith           if (!aij->colmap) {
307905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
308905e6a2fSBarry Smith           }
309905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
310e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
311d9d09a02SSatish Balay           else {
312b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
313b49de8d1SLois Curfman McInnes           }
314b49de8d1SLois Curfman McInnes         }
315b49de8d1SLois Curfman McInnes       }
316d9d09a02SSatish Balay     }
317b49de8d1SLois Curfman McInnes     else {
318e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
319b49de8d1SLois Curfman McInnes     }
320b49de8d1SLois Curfman McInnes   }
321b49de8d1SLois Curfman McInnes   return 0;
322b49de8d1SLois Curfman McInnes }
323b49de8d1SLois Curfman McInnes 
3245615d1e5SSatish Balay #undef __FUNC__
3255615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
3268f6be9afSLois Curfman McInnes int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
3278a729477SBarry Smith {
32844a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
329d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
33017699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
33117699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
3321eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
3336abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
3341eb62cbbSBarry Smith   InsertMode  addv;
3351eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
3361eb62cbbSBarry Smith 
3371eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
33847794344SBarry Smith   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
339dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
340e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
3411eb62cbbSBarry Smith   }
34247794344SBarry Smith   mat->insertmode = addv; /* in case this processor had no cache */
3431eb62cbbSBarry Smith 
3441eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3450452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
346cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3470452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
3481eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
3491eb62cbbSBarry Smith     idx = aij->stash.idx[i];
35017699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3511eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3521eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
3538a729477SBarry Smith       }
3548a729477SBarry Smith     }
3558a729477SBarry Smith   }
35617699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3571eb62cbbSBarry Smith 
3581eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3590452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
36017699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
36117699dbbSLois Curfman McInnes   nreceives = work[rank];
36217699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
36317699dbbSLois Curfman McInnes   nmax = work[rank];
3640452661fSBarry Smith   PetscFree(work);
3651eb62cbbSBarry Smith 
3661eb62cbbSBarry Smith   /* post receives:
3671eb62cbbSBarry Smith        1) each message will consist of ordered pairs
3681eb62cbbSBarry Smith      (global index,value) we store the global index as a double
369d6dfbf8fSBarry Smith      to simplify the message passing.
3701eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
3711eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
3721eb62cbbSBarry Smith      this is a lot of wasted space.
3731eb62cbbSBarry Smith 
3741eb62cbbSBarry Smith 
3751eb62cbbSBarry Smith        This could be done better.
3761eb62cbbSBarry Smith   */
3770452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
37878b31e54SBarry Smith   CHKPTRQ(rvalues);
3790452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
38078b31e54SBarry Smith   CHKPTRQ(recv_waits);
3811eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
382416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
3831eb62cbbSBarry Smith               comm,recv_waits+i);
3841eb62cbbSBarry Smith   }
3851eb62cbbSBarry Smith 
3861eb62cbbSBarry Smith   /* do sends:
3871eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3881eb62cbbSBarry Smith          the ith processor
3891eb62cbbSBarry Smith   */
3900452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
3910452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
39278b31e54SBarry Smith   CHKPTRQ(send_waits);
3930452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
3941eb62cbbSBarry Smith   starts[0] = 0;
39517699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3961eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
3971eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
3981eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
3991eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
4001eb62cbbSBarry Smith   }
4010452661fSBarry Smith   PetscFree(owner);
4021eb62cbbSBarry Smith   starts[0] = 0;
40317699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4041eb62cbbSBarry Smith   count = 0;
40517699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
4061eb62cbbSBarry Smith     if (procs[i]) {
407416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
4081eb62cbbSBarry Smith                 comm,send_waits+count++);
4091eb62cbbSBarry Smith     }
4101eb62cbbSBarry Smith   }
4110452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
4121eb62cbbSBarry Smith 
4131eb62cbbSBarry Smith   /* Free cache space */
41455908cccSLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
41578b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
4161eb62cbbSBarry Smith 
4171eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
4181eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
4191eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
4201eb62cbbSBarry Smith   aij->rmax       = nmax;
4211eb62cbbSBarry Smith 
4221eb62cbbSBarry Smith   return 0;
4231eb62cbbSBarry Smith }
42444a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
4251eb62cbbSBarry Smith 
4265615d1e5SSatish Balay #undef __FUNC__
4275615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
4288f6be9afSLois Curfman McInnes int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
4291eb62cbbSBarry Smith {
43044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
4311eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
432416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
433905e6a2fSBarry Smith   int         row,col,other_disassembled;
4341eb62cbbSBarry Smith   Scalar      *values,val;
43547794344SBarry Smith   InsertMode  addv = mat->insertmode;
4361eb62cbbSBarry Smith 
4371eb62cbbSBarry Smith   /*  wait on receives */
4381eb62cbbSBarry Smith   while (count) {
439d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
4401eb62cbbSBarry Smith     /* unpack receives into our local space */
441d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
442c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
4431eb62cbbSBarry Smith     n = n/3;
4441eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
445227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
446227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
4471eb62cbbSBarry Smith       val = values[3*i+2];
4481eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
4491eb62cbbSBarry Smith         col -= aij->cstart;
4506fd7127cSSatish Balay         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
4511eb62cbbSBarry Smith       }
4521eb62cbbSBarry Smith       else {
45355a1b374SBarry Smith         if (mat->was_assembled || mat->assembled) {
454905e6a2fSBarry Smith           if (!aij->colmap) {
455905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
456905e6a2fSBarry Smith           }
457905e6a2fSBarry Smith           col = aij->colmap[col] - 1;
458ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4592493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
460227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
461d6dfbf8fSBarry Smith           }
4629e25ed09SBarry Smith         }
4636fd7127cSSatish Balay         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
4641eb62cbbSBarry Smith       }
4651eb62cbbSBarry Smith     }
4661eb62cbbSBarry Smith     count--;
4671eb62cbbSBarry Smith   }
4680452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
4691eb62cbbSBarry Smith 
4701eb62cbbSBarry Smith   /* wait on sends */
4711eb62cbbSBarry Smith   if (aij->nsends) {
4720a198c4cSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
4731eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
4740452661fSBarry Smith     PetscFree(send_status);
4751eb62cbbSBarry Smith   }
4760452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
4771eb62cbbSBarry Smith 
47878b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
47978b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
4801eb62cbbSBarry Smith 
4812493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
4822493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
483227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
484227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
4852493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
4862493cbb0SBarry Smith   }
4872493cbb0SBarry Smith 
4886d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
48978b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
4905e42470aSBarry Smith   }
49178b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
49278b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
4935e42470aSBarry Smith 
4947a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
4958a729477SBarry Smith   return 0;
4968a729477SBarry Smith }
4978a729477SBarry Smith 
4985615d1e5SSatish Balay #undef __FUNC__
4995615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ"
5008f6be9afSLois Curfman McInnes int MatZeroEntries_MPIAIJ(Mat A)
5011eb62cbbSBarry Smith {
50244a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
503dbd7a890SLois Curfman McInnes   int        ierr;
50478b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
50578b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
5061eb62cbbSBarry Smith   return 0;
5071eb62cbbSBarry Smith }
5081eb62cbbSBarry Smith 
5091eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
5101eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
5111eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
5121eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
5131eb62cbbSBarry Smith    routine.
5141eb62cbbSBarry Smith */
5155615d1e5SSatish Balay #undef __FUNC__
5165615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ"
5178f6be9afSLois Curfman McInnes int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
5181eb62cbbSBarry Smith {
51944a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
52017699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
5216abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
52217699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
5235392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
524d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
525d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
5261eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
5271eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
5281eb62cbbSBarry Smith   IS             istmp;
5291eb62cbbSBarry Smith 
53077c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
53178b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
5321eb62cbbSBarry Smith 
5331eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
5340452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
535cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
5360452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
5371eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
5381eb62cbbSBarry Smith     idx = rows[i];
5391eb62cbbSBarry Smith     found = 0;
54017699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
5411eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
5421eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
5431eb62cbbSBarry Smith       }
5441eb62cbbSBarry Smith     }
545e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
5461eb62cbbSBarry Smith   }
54717699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
5481eb62cbbSBarry Smith 
5491eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
5500452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
55117699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
55217699dbbSLois Curfman McInnes   nrecvs = work[rank];
55317699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
55417699dbbSLois Curfman McInnes   nmax = work[rank];
5550452661fSBarry Smith   PetscFree(work);
5561eb62cbbSBarry Smith 
5571eb62cbbSBarry Smith   /* post receives:   */
5580452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
55978b31e54SBarry Smith   CHKPTRQ(rvalues);
5600452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
56178b31e54SBarry Smith   CHKPTRQ(recv_waits);
5621eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
563416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
5641eb62cbbSBarry Smith   }
5651eb62cbbSBarry Smith 
5661eb62cbbSBarry Smith   /* do sends:
5671eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
5681eb62cbbSBarry Smith          the ith processor
5691eb62cbbSBarry Smith   */
5700452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
5710452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
57278b31e54SBarry Smith   CHKPTRQ(send_waits);
5730452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
5741eb62cbbSBarry Smith   starts[0] = 0;
57517699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
5761eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
5771eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
5781eb62cbbSBarry Smith   }
5791eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
5801eb62cbbSBarry Smith 
5811eb62cbbSBarry Smith   starts[0] = 0;
58217699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
5831eb62cbbSBarry Smith   count = 0;
58417699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
5851eb62cbbSBarry Smith     if (procs[i]) {
586416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
5871eb62cbbSBarry Smith     }
5881eb62cbbSBarry Smith   }
5890452661fSBarry Smith   PetscFree(starts);
5901eb62cbbSBarry Smith 
59117699dbbSLois Curfman McInnes   base = owners[rank];
5921eb62cbbSBarry Smith 
5931eb62cbbSBarry Smith   /*  wait on receives */
5940452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
5951eb62cbbSBarry Smith   source = lens + nrecvs;
5961eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
5971eb62cbbSBarry Smith   while (count) {
598d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
5991eb62cbbSBarry Smith     /* unpack receives into our local space */
6001eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
601d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
602d6dfbf8fSBarry Smith     lens[imdex]  = n;
6031eb62cbbSBarry Smith     slen += n;
6041eb62cbbSBarry Smith     count--;
6051eb62cbbSBarry Smith   }
6060452661fSBarry Smith   PetscFree(recv_waits);
6071eb62cbbSBarry Smith 
6081eb62cbbSBarry Smith   /* move the data into the send scatter */
6090452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
6101eb62cbbSBarry Smith   count = 0;
6111eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
6121eb62cbbSBarry Smith     values = rvalues + i*nmax;
6131eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
6141eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
6151eb62cbbSBarry Smith     }
6161eb62cbbSBarry Smith   }
6170452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
6180452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
6191eb62cbbSBarry Smith 
6201eb62cbbSBarry Smith   /* actually zap the local rows */
621029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
622464493b3SBarry Smith   PLogObjectParent(A,istmp);
6230452661fSBarry Smith   PetscFree(lrows);
62478b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
62578b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
62678b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
6271eb62cbbSBarry Smith 
6281eb62cbbSBarry Smith   /* wait on sends */
6291eb62cbbSBarry Smith   if (nsends) {
6300452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
63178b31e54SBarry Smith     CHKPTRQ(send_status);
6321eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
6330452661fSBarry Smith     PetscFree(send_status);
6341eb62cbbSBarry Smith   }
6350452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
6361eb62cbbSBarry Smith 
6371eb62cbbSBarry Smith   return 0;
6381eb62cbbSBarry Smith }
6391eb62cbbSBarry Smith 
6405615d1e5SSatish Balay #undef __FUNC__
6415615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ"
6428f6be9afSLois Curfman McInnes int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
6431eb62cbbSBarry Smith {
644416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
645fbd6ef76SBarry Smith   int        ierr,nt;
646416022c9SBarry Smith 
647a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
648fbd6ef76SBarry Smith   if (nt != a->n) {
649f40265daSLois Curfman McInnes     SETERRQ(1,0,"Incompatible partition of A and xx");
650fbd6ef76SBarry Smith   }
65143a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
65238597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
65343a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
65438597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
6551eb62cbbSBarry Smith   return 0;
6561eb62cbbSBarry Smith }
6571eb62cbbSBarry Smith 
6585615d1e5SSatish Balay #undef __FUNC__
6595615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ"
6608f6be9afSLois Curfman McInnes int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
661da3a660dSBarry Smith {
662416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
663da3a660dSBarry Smith   int        ierr;
66443a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
66538597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
66643a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
66738597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
668da3a660dSBarry Smith   return 0;
669da3a660dSBarry Smith }
670da3a660dSBarry Smith 
6715615d1e5SSatish Balay #undef __FUNC__
6725615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ"
6738f6be9afSLois Curfman McInnes int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
674da3a660dSBarry Smith {
675416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
676da3a660dSBarry Smith   int        ierr;
677da3a660dSBarry Smith 
678da3a660dSBarry Smith   /* do nondiagonal part */
67938597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
680da3a660dSBarry Smith   /* send it on its way */
681537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
682da3a660dSBarry Smith   /* do local part */
68338597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
684da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
685da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
686da3a660dSBarry Smith   /* but is not perhaps always true. */
687537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
688da3a660dSBarry Smith   return 0;
689da3a660dSBarry Smith }
690da3a660dSBarry Smith 
6915615d1e5SSatish Balay #undef __FUNC__
6925615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ"
6938f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
694da3a660dSBarry Smith {
695416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
696da3a660dSBarry Smith   int        ierr;
697da3a660dSBarry Smith 
698da3a660dSBarry Smith   /* do nondiagonal part */
69938597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
700da3a660dSBarry Smith   /* send it on its way */
701537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
702da3a660dSBarry Smith   /* do local part */
70338597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
704da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
705da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
706da3a660dSBarry Smith   /* but is not perhaps always true. */
7070a198c4cSBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
708da3a660dSBarry Smith   return 0;
709da3a660dSBarry Smith }
710da3a660dSBarry Smith 
7111eb62cbbSBarry Smith /*
7121eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
7131eb62cbbSBarry Smith    diagonal block
7141eb62cbbSBarry Smith */
7155615d1e5SSatish Balay #undef __FUNC__
7165615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ"
7178f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
7181eb62cbbSBarry Smith {
719416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
72044cd7ae7SLois Curfman McInnes   if (a->M != a->N)
721e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
7225baf8537SBarry Smith   if (a->rstart != a->cstart || a->rend != a->cend) {
723e3372554SBarry Smith     SETERRQ(1,0,"row partition must equal col partition");  }
724416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
7251eb62cbbSBarry Smith }
7261eb62cbbSBarry Smith 
7275615d1e5SSatish Balay #undef __FUNC__
7285615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ"
7298f6be9afSLois Curfman McInnes int MatScale_MPIAIJ(Scalar *aa,Mat A)
730052efed2SBarry Smith {
731052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
732052efed2SBarry Smith   int        ierr;
733052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
734052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
735052efed2SBarry Smith   return 0;
736052efed2SBarry Smith }
737052efed2SBarry Smith 
7385615d1e5SSatish Balay #undef __FUNC__
7395eea60f9SBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */
7408f6be9afSLois Curfman McInnes int MatDestroy_MPIAIJ(PetscObject obj)
7411eb62cbbSBarry Smith {
7421eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
74344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
7441eb62cbbSBarry Smith   int        ierr;
745a5a9c739SBarry Smith #if defined(PETSC_LOG)
7466e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
747a5a9c739SBarry Smith #endif
7480452661fSBarry Smith   PetscFree(aij->rowners);
74978b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
75078b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
7510452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
7520452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
7531eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
754a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
7557a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
7560452661fSBarry Smith   PetscFree(aij);
75790f02eecSBarry Smith   if (mat->mapping) {
75890f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
75990f02eecSBarry Smith   }
760a5a9c739SBarry Smith   PLogObjectDestroy(mat);
7610452661fSBarry Smith   PetscHeaderDestroy(mat);
7621eb62cbbSBarry Smith   return 0;
7631eb62cbbSBarry Smith }
764ee50ffe9SBarry Smith 
7655615d1e5SSatish Balay #undef __FUNC__
7665eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */
7678f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
7681eb62cbbSBarry Smith {
769416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
770416022c9SBarry Smith   int         ierr;
771416022c9SBarry Smith 
77217699dbbSLois Curfman McInnes   if (aij->size == 1) {
773416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
774416022c9SBarry Smith   }
775e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
776416022c9SBarry Smith   return 0;
777416022c9SBarry Smith }
778416022c9SBarry Smith 
7795615d1e5SSatish Balay #undef __FUNC__
7805eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */
7818f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
782416022c9SBarry Smith {
78344a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
784dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
785a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
786d636dbe3SBarry Smith   FILE        *fd;
78719bcc07fSBarry Smith   ViewerType  vtype;
788416022c9SBarry Smith 
78919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
79019bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
79190ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
7920a198c4cSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
7934e220ebcSLois Curfman McInnes       MatInfo info;
7944e220ebcSLois Curfman McInnes       int     flg;
795a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
79690ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
7974e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
79895e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
79977c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
80095e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
8014e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
80295e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
8034e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
8044e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
8054e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
8064e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
8074e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
808a56f8943SBarry Smith       fflush(fd);
80977c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
810a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
811416022c9SBarry Smith       return 0;
812416022c9SBarry Smith     }
8130a198c4cSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
81408480c60SBarry Smith       return 0;
81508480c60SBarry Smith     }
816416022c9SBarry Smith   }
817416022c9SBarry Smith 
81819bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
81919bcc07fSBarry Smith     Draw       draw;
82019bcc07fSBarry Smith     PetscTruth isnull;
82119bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
82219bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
82319bcc07fSBarry Smith   }
82419bcc07fSBarry Smith 
82519bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
82690ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
82777c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
828d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
82917699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
8301eb62cbbSBarry Smith            aij->cend);
83178b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
83278b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
833d13ab20cSBarry Smith     fflush(fd);
83477c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
835d13ab20cSBarry Smith   }
836416022c9SBarry Smith   else {
837a56f8943SBarry Smith     int size = aij->size;
838a56f8943SBarry Smith     rank = aij->rank;
83917699dbbSLois Curfman McInnes     if (size == 1) {
84078b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
84195373324SBarry Smith     }
84295373324SBarry Smith     else {
84395373324SBarry Smith       /* assemble the entire matrix onto first processor. */
84495373324SBarry Smith       Mat         A;
845ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
8462eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
84795373324SBarry Smith       Scalar      *a;
8482ee70a88SLois Curfman McInnes 
84917699dbbSLois Curfman McInnes       if (!rank) {
850b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
851c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
85295373324SBarry Smith       }
85395373324SBarry Smith       else {
854b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
855c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
85695373324SBarry Smith       }
857464493b3SBarry Smith       PLogObjectParent(mat,A);
858416022c9SBarry Smith 
85995373324SBarry Smith       /* copy over the A part */
860ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
8612ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
86295373324SBarry Smith       row = aij->rstart;
863dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
86495373324SBarry Smith       for ( i=0; i<m; i++ ) {
865416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
86695373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
86795373324SBarry Smith       }
8682ee70a88SLois Curfman McInnes       aj = Aloc->j;
869dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
87095373324SBarry Smith 
87195373324SBarry Smith       /* copy over the B part */
872ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
8732ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
87495373324SBarry Smith       row = aij->rstart;
8750452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
876dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
87795373324SBarry Smith       for ( i=0; i<m; i++ ) {
878416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
87995373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
88095373324SBarry Smith       }
8810452661fSBarry Smith       PetscFree(ct);
8826d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8836d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
88417699dbbSLois Curfman McInnes       if (!rank) {
88578b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
88695373324SBarry Smith       }
88778b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
88895373324SBarry Smith     }
88995373324SBarry Smith   }
8901eb62cbbSBarry Smith   return 0;
8911eb62cbbSBarry Smith }
8921eb62cbbSBarry Smith 
8935615d1e5SSatish Balay #undef __FUNC__
8945eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */
8958f6be9afSLois Curfman McInnes int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
896416022c9SBarry Smith {
897416022c9SBarry Smith   Mat         mat = (Mat) obj;
898416022c9SBarry Smith   int         ierr;
89919bcc07fSBarry Smith   ViewerType  vtype;
900416022c9SBarry Smith 
90119bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
90219bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
90319bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
904d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
905416022c9SBarry Smith   }
90619bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
907416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
908416022c9SBarry Smith   }
909416022c9SBarry Smith   return 0;
910416022c9SBarry Smith }
911416022c9SBarry Smith 
9121eb62cbbSBarry Smith /*
9131eb62cbbSBarry Smith     This has to provide several versions.
9141eb62cbbSBarry Smith 
9151eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
9161eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
917d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
9181eb62cbbSBarry Smith */
9195615d1e5SSatish Balay #undef __FUNC__
9205615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ"
9218f6be9afSLois Curfman McInnes int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
922dbb450caSBarry Smith                            double fshift,int its,Vec xx)
9238a729477SBarry Smith {
92444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
925d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
926ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
927c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
9286abc6512SBarry Smith   int        ierr,*idx, *diag;
929416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
9308a729477SBarry Smith 
931d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
932dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
933dbb450caSBarry Smith   ls = ls + shift;
934ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
935d6dfbf8fSBarry Smith   diag = A->diag;
936c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
937da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
93838597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
939da3a660dSBarry Smith     }
94043a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
94178b31e54SBarry Smith     CHKERRQ(ierr);
94243a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
94378b31e54SBarry Smith     CHKERRQ(ierr);
944d6dfbf8fSBarry Smith     while (its--) {
945d6dfbf8fSBarry Smith       /* go down through the rows */
946d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
947d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
948dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
949dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
950d6dfbf8fSBarry Smith         sum  = b[i];
951d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
952dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
953d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
954dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
955dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
956d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
95755a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
958d6dfbf8fSBarry Smith       }
959d6dfbf8fSBarry Smith       /* come up through the rows */
960d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
961d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
962dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
963dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
964d6dfbf8fSBarry Smith         sum  = b[i];
965d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
966dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
967d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
968dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
969dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
970d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
97155a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
972d6dfbf8fSBarry Smith       }
973d6dfbf8fSBarry Smith     }
974d6dfbf8fSBarry Smith   }
975d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
976da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
97738597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
978da3a660dSBarry Smith     }
97943a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
98078b31e54SBarry Smith     CHKERRQ(ierr);
98143a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
98278b31e54SBarry Smith     CHKERRQ(ierr);
983d6dfbf8fSBarry Smith     while (its--) {
984d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
985d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
986dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
987dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
988d6dfbf8fSBarry Smith         sum  = b[i];
989d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
990dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
991d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
992dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
993dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
994d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
99555a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
996d6dfbf8fSBarry Smith       }
997d6dfbf8fSBarry Smith     }
998d6dfbf8fSBarry Smith   }
999d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1000da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
100138597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
1002da3a660dSBarry Smith     }
100343a90d84SBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
100478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
100543a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
100678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
1007d6dfbf8fSBarry Smith     while (its--) {
1008d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
1009d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
1010dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
1011dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
1012d6dfbf8fSBarry Smith         sum  = b[i];
1013d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1014dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
1015d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
1016dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
1017dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
1018d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
101955a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1020d6dfbf8fSBarry Smith       }
1021d6dfbf8fSBarry Smith     }
1022d6dfbf8fSBarry Smith   }
1023c16cb8f2SBarry Smith   else {
1024e3372554SBarry Smith     SETERRQ(1,0,"Option not supported");
1025c16cb8f2SBarry Smith   }
10268a729477SBarry Smith   return 0;
10278a729477SBarry Smith }
1028a66be287SLois Curfman McInnes 
10295615d1e5SSatish Balay #undef __FUNC__
10305eea60f9SBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */
10318f6be9afSLois Curfman McInnes int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1032a66be287SLois Curfman McInnes {
1033a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1034a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
10354e220ebcSLois Curfman McInnes   int        ierr;
10364e220ebcSLois Curfman McInnes   double     isend[5], irecv[5];
1037a66be287SLois Curfman McInnes 
10384e220ebcSLois Curfman McInnes   info->rows_global    = (double)mat->M;
10394e220ebcSLois Curfman McInnes   info->columns_global = (double)mat->N;
10404e220ebcSLois Curfman McInnes   info->rows_local     = (double)mat->m;
10414e220ebcSLois Curfman McInnes   info->columns_local  = (double)mat->N;
10424e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
10434e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
10444e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
10454e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
10464e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
10474e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
10484e220ebcSLois Curfman McInnes   isend[3] += info->memory;  isend[4] += info->mallocs;
1049a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
10504e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
10514e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
10524e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
10534e220ebcSLois Curfman McInnes     info->memory       = isend[3];
10544e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
1055a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
10564e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
10574e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10584e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10594e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10604e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10614e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1062a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
10634e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
10644e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10654e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10664e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10674e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10684e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1069a66be287SLois Curfman McInnes   }
10704e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
10714e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
10724e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
10734e220ebcSLois Curfman McInnes 
1074a66be287SLois Curfman McInnes   return 0;
1075a66be287SLois Curfman McInnes }
1076a66be287SLois Curfman McInnes 
1077299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1078299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1079299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1080299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1081299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1082299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1083299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1084299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1085299609e3SLois Curfman McInnes 
10865615d1e5SSatish Balay #undef __FUNC__
10875eea60f9SBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */
10888f6be9afSLois Curfman McInnes int MatSetOption_MPIAIJ(Mat A,MatOption op)
1089c74985f6SBarry Smith {
1090c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1091c74985f6SBarry Smith 
10926d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
10936d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
10946da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1095c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
109696854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1097c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1098b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1099b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1100b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1101aeafbbfcSLois Curfman McInnes     a->roworiented = 1;
1102c0bbcb79SLois Curfman McInnes     MatSetOption(a->A,op);
1103c0bbcb79SLois Curfman McInnes     MatSetOption(a->B,op);
1104b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
11056da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
11066d4a8577SBarry Smith              op == MAT_SYMMETRIC ||
11076d4a8577SBarry Smith              op == MAT_STRUCTURALLY_SYMMETRIC ||
11086d4a8577SBarry Smith              op == MAT_YES_NEW_DIAGONALS)
110994a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
11106d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
11114b0e389bSBarry Smith     a->roworiented = 0;
11124b0e389bSBarry Smith     MatSetOption(a->A,op);
11134b0e389bSBarry Smith     MatSetOption(a->B,op);
11142b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
111590f02eecSBarry Smith     a->donotstash = 1;
111690f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1117e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1118c0bbcb79SLois Curfman McInnes   else
1119e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1120c74985f6SBarry Smith   return 0;
1121c74985f6SBarry Smith }
1122c74985f6SBarry Smith 
11235615d1e5SSatish Balay #undef __FUNC__
11245eea60f9SBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */
11258f6be9afSLois Curfman McInnes int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1126c74985f6SBarry Smith {
112744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1128c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1129c74985f6SBarry Smith   return 0;
1130c74985f6SBarry Smith }
1131c74985f6SBarry Smith 
11325615d1e5SSatish Balay #undef __FUNC__
11335eea60f9SBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */
11348f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1135c74985f6SBarry Smith {
113644a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1137b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1138c74985f6SBarry Smith   return 0;
1139c74985f6SBarry Smith }
1140c74985f6SBarry Smith 
11415615d1e5SSatish Balay #undef __FUNC__
11425eea60f9SBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */
11438f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1144c74985f6SBarry Smith {
114544a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1146c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1147c74985f6SBarry Smith   return 0;
1148c74985f6SBarry Smith }
1149c74985f6SBarry Smith 
11506d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11516d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11526d84be18SBarry Smith 
11535615d1e5SSatish Balay #undef __FUNC__
11545615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ"
11556d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
115639e00950SLois Curfman McInnes {
1157154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
115870f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1159154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1160154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
116170f0671dSBarry Smith   int        *cmap, *idx_p;
116239e00950SLois Curfman McInnes 
1163e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
11647a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
11657a0afa10SBarry Smith 
116670f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
11677a0afa10SBarry Smith     /*
11687a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
11697a0afa10SBarry Smith     */
11707a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1171c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
1172c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
11737a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
11747a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
11757a0afa10SBarry Smith     }
11767a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
11777a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
11787a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
11797a0afa10SBarry Smith   }
11807a0afa10SBarry Smith 
1181e3372554SBarry Smith   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1182abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
118339e00950SLois Curfman McInnes 
1184154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1185154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1186154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
118738597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
118838597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1189154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1190154123eaSLois Curfman McInnes 
119170f0671dSBarry Smith   cmap  = mat->garray;
1192154123eaSLois Curfman McInnes   if (v  || idx) {
1193154123eaSLois Curfman McInnes     if (nztot) {
1194154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
119570f0671dSBarry Smith       int imark = -1;
1196154123eaSLois Curfman McInnes       if (v) {
119770f0671dSBarry Smith         *v = v_p = mat->rowvalues;
119839e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
119970f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1200154123eaSLois Curfman McInnes           else break;
1201154123eaSLois Curfman McInnes         }
1202154123eaSLois Curfman McInnes         imark = i;
120370f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
120470f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1205154123eaSLois Curfman McInnes       }
1206154123eaSLois Curfman McInnes       if (idx) {
120770f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
120870f0671dSBarry Smith         if (imark > -1) {
120970f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
121070f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
121170f0671dSBarry Smith           }
121270f0671dSBarry Smith         } else {
1213154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
121470f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1215154123eaSLois Curfman McInnes             else break;
1216154123eaSLois Curfman McInnes           }
1217154123eaSLois Curfman McInnes           imark = i;
121870f0671dSBarry Smith         }
121970f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
122070f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
122139e00950SLois Curfman McInnes       }
122239e00950SLois Curfman McInnes     }
12231ca473b0SSatish Balay     else {
12241ca473b0SSatish Balay       if (idx) *idx = 0;
12251ca473b0SSatish Balay       if (v)   *v   = 0;
12261ca473b0SSatish Balay     }
1227154123eaSLois Curfman McInnes   }
122839e00950SLois Curfman McInnes   *nz = nztot;
122938597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
123038597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
123139e00950SLois Curfman McInnes   return 0;
123239e00950SLois Curfman McInnes }
123339e00950SLois Curfman McInnes 
12345615d1e5SSatish Balay #undef __FUNC__
12355eea60f9SBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */
12366d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
123739e00950SLois Curfman McInnes {
12387a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
12397a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
1240e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
12417a0afa10SBarry Smith   }
12427a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
124339e00950SLois Curfman McInnes   return 0;
124439e00950SLois Curfman McInnes }
124539e00950SLois Curfman McInnes 
12465615d1e5SSatish Balay #undef __FUNC__
12475615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ"
12488f6be9afSLois Curfman McInnes int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1249855ac2c5SLois Curfman McInnes {
1250855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1251ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1252416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1253416022c9SBarry Smith   double     sum = 0.0;
125404ca555eSLois Curfman McInnes   Scalar     *v;
125504ca555eSLois Curfman McInnes 
125617699dbbSLois Curfman McInnes   if (aij->size == 1) {
125714183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
125837fa93a5SLois Curfman McInnes   } else {
125904ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
126004ca555eSLois Curfman McInnes       v = amat->a;
126104ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
126204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
126304ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
126404ca555eSLois Curfman McInnes #else
126504ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
126604ca555eSLois Curfman McInnes #endif
126704ca555eSLois Curfman McInnes       }
126804ca555eSLois Curfman McInnes       v = bmat->a;
126904ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
127004ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
127104ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
127204ca555eSLois Curfman McInnes #else
127304ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
127404ca555eSLois Curfman McInnes #endif
127504ca555eSLois Curfman McInnes       }
12766d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
127704ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
127804ca555eSLois Curfman McInnes     }
127904ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
128004ca555eSLois Curfman McInnes       double *tmp, *tmp2;
128104ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
1282758f045eSSatish Balay       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1283758f045eSSatish Balay       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1284cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
128504ca555eSLois Curfman McInnes       *norm = 0.0;
128604ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
128704ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1288579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
128904ca555eSLois Curfman McInnes       }
129004ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
129104ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1292579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
129304ca555eSLois Curfman McInnes       }
12946d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
129504ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
129604ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
129704ca555eSLois Curfman McInnes       }
12980452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
129904ca555eSLois Curfman McInnes     }
130004ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1301515d9167SLois Curfman McInnes       double ntemp = 0.0;
130204ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1303dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
130404ca555eSLois Curfman McInnes         sum = 0.0;
130504ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1306cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
130704ca555eSLois Curfman McInnes         }
1308dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
130904ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1310cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
131104ca555eSLois Curfman McInnes         }
1312515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
131304ca555eSLois Curfman McInnes       }
13146d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
131504ca555eSLois Curfman McInnes     }
131604ca555eSLois Curfman McInnes     else {
1317e3372554SBarry Smith       SETERRQ(1,0,"No support for two norm");
131804ca555eSLois Curfman McInnes     }
131937fa93a5SLois Curfman McInnes   }
1320855ac2c5SLois Curfman McInnes   return 0;
1321855ac2c5SLois Curfman McInnes }
1322855ac2c5SLois Curfman McInnes 
13235615d1e5SSatish Balay #undef __FUNC__
13245615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ"
13258f6be9afSLois Curfman McInnes int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1326b7c46309SBarry Smith {
1327b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1328dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1329416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1330416022c9SBarry Smith   Mat        B;
1331b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1332b7c46309SBarry Smith   Scalar     *array;
1333b7c46309SBarry Smith 
13343638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
1335e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
1336b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1337b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1338b7c46309SBarry Smith 
1339b7c46309SBarry Smith   /* copy over the A part */
1340ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1341b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1342b7c46309SBarry Smith   row = a->rstart;
1343dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1344b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1345416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1346b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1347b7c46309SBarry Smith   }
1348b7c46309SBarry Smith   aj = Aloc->j;
13494af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1350b7c46309SBarry Smith 
1351b7c46309SBarry Smith   /* copy over the B part */
1352ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1353b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1354b7c46309SBarry Smith   row = a->rstart;
13550452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1356dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1357b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1358416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1359b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1360b7c46309SBarry Smith   }
13610452661fSBarry Smith   PetscFree(ct);
13626d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
13636d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
13643638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
13650de55854SLois Curfman McInnes     *matout = B;
13660de55854SLois Curfman McInnes   } else {
13670de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
13680452661fSBarry Smith     PetscFree(a->rowners);
13690de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
13700de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
13710452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
13720452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
13730de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1374a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
13750452661fSBarry Smith     PetscFree(a);
1376f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
13770452661fSBarry Smith     PetscHeaderDestroy(B);
13780de55854SLois Curfman McInnes   }
1379b7c46309SBarry Smith   return 0;
1380b7c46309SBarry Smith }
1381b7c46309SBarry Smith 
13825615d1e5SSatish Balay #undef __FUNC__
13835615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ"
13844b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1385a008b906SSatish Balay {
13864b967eb1SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
13874b967eb1SSatish Balay   Mat a = aij->A, b = aij->B;
1388a008b906SSatish Balay   int ierr,s1,s2,s3;
1389a008b906SSatish Balay 
13904b967eb1SSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
13914b967eb1SSatish Balay   if (rr) {
13924b967eb1SSatish Balay     s3 = aij->n;
13934b967eb1SSatish Balay     VecGetLocalSize_Fast(rr,s1);
1394e3372554SBarry Smith     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
13954b967eb1SSatish Balay     /* Overlap communication with computation. */
139643a90d84SBarry Smith     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1397a008b906SSatish Balay   }
13984b967eb1SSatish Balay   if (ll) {
13994b967eb1SSatish Balay     VecGetLocalSize_Fast(ll,s1);
1400e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1401c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
14024b967eb1SSatish Balay   }
14034b967eb1SSatish Balay   /* scale  the diagonal block */
1404c351683dSSatish Balay   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
14054b967eb1SSatish Balay 
14064b967eb1SSatish Balay   if (rr) {
14074b967eb1SSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
140843a90d84SBarry Smith     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1409c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
14104b967eb1SSatish Balay   }
14114b967eb1SSatish Balay 
1412a008b906SSatish Balay   return 0;
1413a008b906SSatish Balay }
1414a008b906SSatish Balay 
1415a008b906SSatish Balay 
1416682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
14175615d1e5SSatish Balay #undef __FUNC__
14185eea60f9SBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */
14198f6be9afSLois Curfman McInnes int MatPrintHelp_MPIAIJ(Mat A)
1420682d7d0cSBarry Smith {
1421682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1422682d7d0cSBarry Smith 
1423682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1424682d7d0cSBarry Smith   else return 0;
1425682d7d0cSBarry Smith }
1426682d7d0cSBarry Smith 
14275615d1e5SSatish Balay #undef __FUNC__
14285eea60f9SBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */
14298f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
14305a838052SSatish Balay {
14315a838052SSatish Balay   *bs = 1;
14325a838052SSatish Balay   return 0;
14335a838052SSatish Balay }
14345615d1e5SSatish Balay #undef __FUNC__
14355eea60f9SBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */
14368f6be9afSLois Curfman McInnes int MatSetUnfactored_MPIAIJ(Mat A)
1437bb5a7306SBarry Smith {
1438bb5a7306SBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1439bb5a7306SBarry Smith   int        ierr;
1440bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1441bb5a7306SBarry Smith   return 0;
1442bb5a7306SBarry Smith }
1443bb5a7306SBarry Smith 
14448f6be9afSLois Curfman McInnes extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
14452f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
14460a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
14470a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
14488a729477SBarry Smith /* -------------------------------------------------------------------*/
14492ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
145039e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
145144a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
145244a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
145336ce4990SBarry Smith        0,0,
145436ce4990SBarry Smith        0,0,
145536ce4990SBarry Smith        0,0,
145644a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1457b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1458a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1459a008b906SSatish Balay        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1460ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
14611eb62cbbSBarry Smith        0,
1462299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
146336ce4990SBarry Smith        0,0,0,0,
1464d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
146536ce4990SBarry Smith        0,0,
146694a9d846SBarry Smith        0,0,MatConvertSameType_MPIAIJ,0,0,
1467b49de8d1SLois Curfman McInnes        0,0,0,
1468598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1469052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
14703b2fbd54SBarry Smith        MatScale_MPIAIJ,0,0,0,
14710a198c4cSBarry Smith        MatGetBlockSize_MPIAIJ,0,0,0,0,
1472bb5a7306SBarry Smith        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
147336ce4990SBarry Smith 
14748a729477SBarry Smith 
14755615d1e5SSatish Balay #undef __FUNC__
14765615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ"
14771987afe7SBarry Smith /*@C
1478ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
14793a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
14803a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
14813a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
14823a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
14838a729477SBarry Smith 
14848a729477SBarry Smith    Input Parameters:
14851eb62cbbSBarry Smith .  comm - MPI communicator
14867d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
148792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
148892e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
14891a3896d6SBarry Smith .  n - This value should be the same as the local size used in creating the
14901a3896d6SBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
14911a3896d6SBarry Smith        calculated if N is given) For square matrices n is almost always m.
14927d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
149392e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1494ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1495ff756334SLois Curfman McInnes            (same for all local rows)
14962bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
149792e8d321SLois Curfman McInnes            diagonal portion of the local submatrix (possibly different for each row)
14982bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
14992bd5e0b2SLois Curfman McInnes            it is zero.
15002bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1501ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
15022bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
15032bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
15042bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
15058a729477SBarry Smith 
1506ff756334SLois Curfman McInnes    Output Parameter:
150744cd7ae7SLois Curfman McInnes .  A - the matrix
15088a729477SBarry Smith 
1509ff756334SLois Curfman McInnes    Notes:
1510ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1511ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
15120002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
15130002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1514ff756334SLois Curfman McInnes 
1515ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1516ff756334SLois Curfman McInnes    (possibly both).
1517ff756334SLois Curfman McInnes 
15185511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
15195511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
15205511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
15215511cfe3SLois Curfman McInnes 
15225511cfe3SLois Curfman McInnes    Options Database Keys:
15235511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
15246e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
15256e62573dSLois Curfman McInnes $        (max limit=5)
15266e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
15276e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
15286e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
15295511cfe3SLois Curfman McInnes 
1530e0245417SLois Curfman McInnes    Storage Information:
1531e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1532e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1533e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1534e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1535e0245417SLois Curfman McInnes 
1536e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
15375ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
15385ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
15395ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
15405ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1541ff756334SLois Curfman McInnes 
15425511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
15435511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
15442191d07cSBarry Smith 
1545b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1546b810aeb4SBarry Smith $         -------------------
15475511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
15485511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
15495511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
15505511cfe3SLois Curfman McInnes $         -------------------
1551b810aeb4SBarry Smith $
1552b810aeb4SBarry Smith 
15535511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
15545511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
15555511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
15565511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
15575511cfe3SLois Curfman McInnes 
15585511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
15595511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
15605511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
15613d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
156292e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
15636da5968aSLois Curfman McInnes    matrices.
15643a511b96SLois Curfman McInnes 
1565dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1566ff756334SLois Curfman McInnes 
1567fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
15688a729477SBarry Smith @*/
15691eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
157044cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
15718a729477SBarry Smith {
157244cd7ae7SLois Curfman McInnes   Mat          B;
157344cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
157436ce4990SBarry Smith   int          ierr, i,sum[2],work[2],size;
1575416022c9SBarry Smith 
157644cd7ae7SLois Curfman McInnes   *A = 0;
1577f09e8eb9SSatish Balay   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm);
157844cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
157944cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
158044cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
158144cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
158236ce4990SBarry Smith   MPI_Comm_size(comm,&size);
158336ce4990SBarry Smith   if (size == 1) {
158436ce4990SBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
158536ce4990SBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
158636ce4990SBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
158736ce4990SBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
158836ce4990SBarry Smith     B->ops.lufactor          = MatLUFactor_MPIAIJ;
158936ce4990SBarry Smith     B->ops.solve             = MatSolve_MPIAIJ;
159036ce4990SBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
159136ce4990SBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
159236ce4990SBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
159336ce4990SBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
159436ce4990SBarry Smith   }
159544cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
159644cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
159744cd7ae7SLois Curfman McInnes   B->factor     = 0;
159844cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
159990f02eecSBarry Smith   B->mapping    = 0;
1600d6dfbf8fSBarry Smith 
160147794344SBarry Smith   B->insertmode = NOT_SET_VALUES;
160244cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
160344cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
16041eb62cbbSBarry Smith 
1605b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1606e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
16071987afe7SBarry Smith 
1608dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
16091eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1610d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1611dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1612dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
16131eb62cbbSBarry Smith   }
161444cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
161544cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
161644cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
161744cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
161844cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
161944cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
16201eb62cbbSBarry Smith 
16211eb62cbbSBarry Smith   /* build local table of row and column ownerships */
162244cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1623f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1624603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
162544cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
162644cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
162744cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
162844cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
16298a729477SBarry Smith   }
163044cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
163144cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
163244cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
163344cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
163444cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
163544cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
16368a729477SBarry Smith   }
163744cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
163844cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
16398a729477SBarry Smith 
16405ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1641029af93fSBarry Smith   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
164244cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
16437b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1644029af93fSBarry Smith   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
164544cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
16468a729477SBarry Smith 
16471eb62cbbSBarry Smith   /* build cache for off array entries formed */
164844cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
164990f02eecSBarry Smith   b->donotstash  = 0;
165044cd7ae7SLois Curfman McInnes   b->colmap      = 0;
165144cd7ae7SLois Curfman McInnes   b->garray      = 0;
165244cd7ae7SLois Curfman McInnes   b->roworiented = 1;
16538a729477SBarry Smith 
16541eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
165544cd7ae7SLois Curfman McInnes   b->lvec      = 0;
165644cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
16578a729477SBarry Smith 
16587a0afa10SBarry Smith   /* stuff for MatGetRow() */
165944cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
166044cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
166144cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
16627a0afa10SBarry Smith 
166344cd7ae7SLois Curfman McInnes   *A = B;
1664d6dfbf8fSBarry Smith   return 0;
1665d6dfbf8fSBarry Smith }
1666c74985f6SBarry Smith 
16675615d1e5SSatish Balay #undef __FUNC__
16685615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ"
16698f6be9afSLois Curfman McInnes int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1670d6dfbf8fSBarry Smith {
1671d6dfbf8fSBarry Smith   Mat        mat;
1672416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1673a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1674d6dfbf8fSBarry Smith 
1675416022c9SBarry Smith   *newmat       = 0;
1676f09e8eb9SSatish Balay   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1677a5a9c739SBarry Smith   PLogObjectCreate(mat);
16780452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1679416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
168044a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
168144a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1682d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1683c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1684d6dfbf8fSBarry Smith 
168544cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
168644cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
168744cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
168844cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1689d6dfbf8fSBarry Smith 
1690416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1691416022c9SBarry Smith   a->rend         = oldmat->rend;
1692416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1693416022c9SBarry Smith   a->cend         = oldmat->cend;
169417699dbbSLois Curfman McInnes   a->size         = oldmat->size;
169517699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
169647794344SBarry Smith   mat->insertmode = NOT_SET_VALUES;
1697bcd2baecSBarry Smith   a->rowvalues    = 0;
1698bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1699d6dfbf8fSBarry Smith 
1700603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1701f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1702603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1703603f58a4SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1704416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
17052ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
17060452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1707416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1708416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1709416022c9SBarry Smith   } else a->colmap = 0;
17103f41c07dSBarry Smith   if (oldmat->garray) {
17113f41c07dSBarry Smith     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
17123f41c07dSBarry Smith     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1713464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
17143f41c07dSBarry Smith     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1715416022c9SBarry Smith   } else a->garray = 0;
1716d6dfbf8fSBarry Smith 
1717416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1718416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1719a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1720416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1721416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1722416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1723416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1724416022c9SBarry Smith   PLogObjectParent(mat,a->B);
17255dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
172625cdf11fSBarry Smith   if (flg) {
1727682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1728682d7d0cSBarry Smith   }
17298a729477SBarry Smith   *newmat = mat;
17308a729477SBarry Smith   return 0;
17318a729477SBarry Smith }
1732416022c9SBarry Smith 
173377c4ece6SBarry Smith #include "sys.h"
1734416022c9SBarry Smith 
17355615d1e5SSatish Balay #undef __FUNC__
17365615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ"
173719bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1738416022c9SBarry Smith {
1739d65a2f8fSBarry Smith   Mat          A;
1740416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1741d65a2f8fSBarry Smith   Scalar       *vals,*svals;
174219bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1743416022c9SBarry Smith   MPI_Status   status;
174417699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1745d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
174619bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1747416022c9SBarry Smith 
174817699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
174917699dbbSLois Curfman McInnes   if (!rank) {
175090ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
175177c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1752e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1753416022c9SBarry Smith   }
1754416022c9SBarry Smith 
1755416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1756416022c9SBarry Smith   M = header[1]; N = header[2];
1757416022c9SBarry Smith   /* determine ownership of all rows */
175817699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
17590452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1760416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1761416022c9SBarry Smith   rowners[0] = 0;
176217699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1763416022c9SBarry Smith     rowners[i] += rowners[i-1];
1764416022c9SBarry Smith   }
176517699dbbSLois Curfman McInnes   rstart = rowners[rank];
176617699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1767416022c9SBarry Smith 
1768416022c9SBarry Smith   /* distribute row lengths to all processors */
17690452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1770416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
177117699dbbSLois Curfman McInnes   if (!rank) {
17720452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
177377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
17740452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
177517699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1776d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
17770452661fSBarry Smith     PetscFree(sndcounts);
1778416022c9SBarry Smith   }
1779416022c9SBarry Smith   else {
1780416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1781416022c9SBarry Smith   }
1782416022c9SBarry Smith 
178317699dbbSLois Curfman McInnes   if (!rank) {
1784416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
17850452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1786cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
178717699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1788416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1789416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1790416022c9SBarry Smith       }
1791416022c9SBarry Smith     }
17920452661fSBarry Smith     PetscFree(rowlengths);
1793416022c9SBarry Smith 
1794416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1795416022c9SBarry Smith     maxnz = 0;
179617699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
17970452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1798416022c9SBarry Smith     }
17990452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1800416022c9SBarry Smith 
1801416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1802416022c9SBarry Smith     nz = procsnz[0];
18030452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
180477c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1805d65a2f8fSBarry Smith 
1806d65a2f8fSBarry Smith     /* read in every one elses and ship off */
180717699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1808d65a2f8fSBarry Smith       nz = procsnz[i];
180977c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1810d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1811d65a2f8fSBarry Smith     }
18120452661fSBarry Smith     PetscFree(cols);
1813416022c9SBarry Smith   }
1814416022c9SBarry Smith   else {
1815416022c9SBarry Smith     /* determine buffer space needed for message */
1816416022c9SBarry Smith     nz = 0;
1817416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1818416022c9SBarry Smith       nz += ourlens[i];
1819416022c9SBarry Smith     }
18200452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1821416022c9SBarry Smith 
1822416022c9SBarry Smith     /* receive message of column indices*/
1823d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1824416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1825e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1826416022c9SBarry Smith   }
1827416022c9SBarry Smith 
1828416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1829cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1830416022c9SBarry Smith   jj = 0;
1831416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1832416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1833d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1834416022c9SBarry Smith       jj++;
1835416022c9SBarry Smith     }
1836416022c9SBarry Smith   }
1837d65a2f8fSBarry Smith 
1838d65a2f8fSBarry Smith   /* create our matrix */
1839416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1840416022c9SBarry Smith     ourlens[i] -= offlens[i];
1841416022c9SBarry Smith   }
1842d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1843d65a2f8fSBarry Smith   A = *newmat;
18446d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
1845d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1846d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1847d65a2f8fSBarry Smith   }
1848416022c9SBarry Smith 
184917699dbbSLois Curfman McInnes   if (!rank) {
18500452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1851416022c9SBarry Smith 
1852416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1853416022c9SBarry Smith     nz = procsnz[0];
185477c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1855d65a2f8fSBarry Smith 
1856d65a2f8fSBarry Smith     /* insert into matrix */
1857d65a2f8fSBarry Smith     jj      = rstart;
1858d65a2f8fSBarry Smith     smycols = mycols;
1859d65a2f8fSBarry Smith     svals   = vals;
1860d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1861d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1862d65a2f8fSBarry Smith       smycols += ourlens[i];
1863d65a2f8fSBarry Smith       svals   += ourlens[i];
1864d65a2f8fSBarry Smith       jj++;
1865416022c9SBarry Smith     }
1866416022c9SBarry Smith 
1867d65a2f8fSBarry Smith     /* read in other processors and ship out */
186817699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1869416022c9SBarry Smith       nz = procsnz[i];
187077c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1871d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1872416022c9SBarry Smith     }
18730452661fSBarry Smith     PetscFree(procsnz);
1874416022c9SBarry Smith   }
1875d65a2f8fSBarry Smith   else {
1876d65a2f8fSBarry Smith     /* receive numeric values */
18770452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1878416022c9SBarry Smith 
1879d65a2f8fSBarry Smith     /* receive message of values*/
1880d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1881d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1882e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1883d65a2f8fSBarry Smith 
1884d65a2f8fSBarry Smith     /* insert into matrix */
1885d65a2f8fSBarry Smith     jj      = rstart;
1886d65a2f8fSBarry Smith     smycols = mycols;
1887d65a2f8fSBarry Smith     svals   = vals;
1888d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1889d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1890d65a2f8fSBarry Smith       smycols += ourlens[i];
1891d65a2f8fSBarry Smith       svals   += ourlens[i];
1892d65a2f8fSBarry Smith       jj++;
1893d65a2f8fSBarry Smith     }
1894d65a2f8fSBarry Smith   }
18950452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1896d65a2f8fSBarry Smith 
18976d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
18986d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1899416022c9SBarry Smith   return 0;
1900416022c9SBarry Smith }
1901