xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 83e2fdc756c13f964f3ed5aaff42e3626c5b72bc)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*83e2fdc7SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.210 1997/07/16 22:00:13 balay Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
8d9942c19SSatish Balay #include "src/inline/spops.h"
98a729477SBarry Smith 
109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
129e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
139e25ed09SBarry Smith length of colmap equals the global matrix length.
149e25ed09SBarry Smith */
155615d1e5SSatish Balay #undef __FUNC__
165eea60f9SBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */
170a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat)
189e25ed09SBarry Smith {
1944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21905e6a2fSBarry Smith   int        n = B->n,i;
22dbb450caSBarry Smith 
23758f045eSSatish Balay   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
24464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
25cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
26905e6a2fSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
279e25ed09SBarry Smith   return 0;
289e25ed09SBarry Smith }
299e25ed09SBarry Smith 
302493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
312493cbb0SBarry Smith 
325615d1e5SSatish Balay #undef __FUNC__
335615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIAIJ"
348f6be9afSLois Curfman McInnes int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
353b2fbd54SBarry Smith                            PetscTruth *done)
36299609e3SLois Curfman McInnes {
37299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
38299609e3SLois Curfman McInnes   int        ierr;
3917699dbbSLois Curfman McInnes   if (aij->size == 1) {
403b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
41e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
423b2fbd54SBarry Smith   return 0;
433b2fbd54SBarry Smith }
443b2fbd54SBarry Smith 
455615d1e5SSatish Balay #undef __FUNC__
465eea60f9SBarry Smith #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */
478f6be9afSLois Curfman McInnes int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
483b2fbd54SBarry Smith                                PetscTruth *done)
493b2fbd54SBarry Smith {
503b2fbd54SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
513b2fbd54SBarry Smith   int        ierr;
523b2fbd54SBarry Smith   if (aij->size == 1) {
533b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
55299609e3SLois Curfman McInnes   return 0;
56299609e3SLois Curfman McInnes }
57299609e3SLois Curfman McInnes 
580520107fSSatish Balay #define CHUNKSIZE   15
5930770e4dSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
600520107fSSatish Balay { \
610520107fSSatish Balay  \
620520107fSSatish Balay     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
6330770e4dSSatish Balay     rmax = aimax[row]; nrow = ailen[row];  \
64f5e9677aSSatish Balay     col1 = col - shift; \
65f5e9677aSSatish Balay      \
66ba4e3ef2SSatish Balay     low = 0; high = nrow; \
67ba4e3ef2SSatish Balay     while (high-low > 5) { \
68ba4e3ef2SSatish Balay       t = (low+high)/2; \
69ba4e3ef2SSatish Balay       if (rp[t] > col) high = t; \
70ba4e3ef2SSatish Balay       else             low  = t; \
71ba4e3ef2SSatish Balay     } \
720520107fSSatish Balay       for ( _i=0; _i<nrow; _i++ ) { \
73f5e9677aSSatish Balay         if (rp[_i] > col1) break; \
74f5e9677aSSatish Balay         if (rp[_i] == col1) { \
750520107fSSatish Balay           if (addv == ADD_VALUES) ap[_i] += value;   \
760520107fSSatish Balay           else                  ap[_i] = value; \
7730770e4dSSatish Balay           goto a_noinsert; \
780520107fSSatish Balay         } \
790520107fSSatish Balay       }  \
8089280ab3SLois Curfman McInnes       if (nonew == 1) goto a_noinsert; \
8111ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
820520107fSSatish Balay       if (nrow >= rmax) { \
830520107fSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
840520107fSSatish Balay         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
850520107fSSatish Balay         Scalar *new_a; \
860520107fSSatish Balay  \
8711ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
8889280ab3SLois Curfman McInnes  \
890520107fSSatish Balay         /* malloc new storage space */ \
900520107fSSatish Balay         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
910520107fSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
920520107fSSatish Balay         new_j   = (int *) (new_a + new_nz); \
930520107fSSatish Balay         new_i   = new_j + new_nz; \
940520107fSSatish Balay  \
950520107fSSatish Balay         /* copy over old data into new slots */ \
960520107fSSatish Balay         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
970520107fSSatish Balay         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
980520107fSSatish Balay         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
990520107fSSatish Balay         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
1000520107fSSatish Balay         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
1010520107fSSatish Balay                                                            len*sizeof(int)); \
1020520107fSSatish Balay         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
1030520107fSSatish Balay         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
1040520107fSSatish Balay                                                            len*sizeof(Scalar));  \
1050520107fSSatish Balay         /* free up old matrix storage */ \
106f5e9677aSSatish Balay  \
1070520107fSSatish Balay         PetscFree(a->a);  \
1080520107fSSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
1090520107fSSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1100520107fSSatish Balay         a->singlemalloc = 1; \
1110520107fSSatish Balay  \
1120520107fSSatish Balay         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
11330770e4dSSatish Balay         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
1140520107fSSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
1150520107fSSatish Balay         a->maxnz += CHUNKSIZE; \
1160520107fSSatish Balay         a->reallocs++; \
1170520107fSSatish Balay       } \
1180520107fSSatish Balay       N = nrow++ - 1; a->nz++; \
1190520107fSSatish Balay       /* shift up all the later entries in this row */ \
1200520107fSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
1210520107fSSatish Balay         rp[ii+1] = rp[ii]; \
1220520107fSSatish Balay         ap[ii+1] = ap[ii]; \
1230520107fSSatish Balay       } \
124f5e9677aSSatish Balay       rp[_i] = col1;  \
1250520107fSSatish Balay       ap[_i] = value;  \
12630770e4dSSatish Balay       a_noinsert: ; \
1270520107fSSatish Balay       ailen[row] = nrow; \
1280520107fSSatish Balay }
1290a198c4cSBarry Smith 
13030770e4dSSatish Balay #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
13130770e4dSSatish Balay { \
13230770e4dSSatish Balay  \
13330770e4dSSatish Balay     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
13430770e4dSSatish Balay     rmax = bimax[row]; nrow = bilen[row];  \
13530770e4dSSatish Balay     col1 = col - shift; \
13630770e4dSSatish Balay      \
137ba4e3ef2SSatish Balay     low = 0; high = nrow; \
138ba4e3ef2SSatish Balay     while (high-low > 5) { \
139ba4e3ef2SSatish Balay       t = (low+high)/2; \
140ba4e3ef2SSatish Balay       if (rp[t] > col) high = t; \
141ba4e3ef2SSatish Balay       else             low  = t; \
142ba4e3ef2SSatish Balay     } \
14330770e4dSSatish Balay        for ( _i=0; _i<nrow; _i++ ) { \
14430770e4dSSatish Balay         if (rp[_i] > col1) break; \
14530770e4dSSatish Balay         if (rp[_i] == col1) { \
14630770e4dSSatish Balay           if (addv == ADD_VALUES) ap[_i] += value;   \
14730770e4dSSatish Balay           else                  ap[_i] = value; \
14830770e4dSSatish Balay           goto b_noinsert; \
14930770e4dSSatish Balay         } \
15030770e4dSSatish Balay       }  \
15189280ab3SLois Curfman McInnes       if (nonew == 1) goto b_noinsert; \
15211ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
15330770e4dSSatish Balay       if (nrow >= rmax) { \
15430770e4dSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
15574c639caSSatish Balay         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
15630770e4dSSatish Balay         Scalar *new_a; \
15730770e4dSSatish Balay  \
15811ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
15989280ab3SLois Curfman McInnes  \
16030770e4dSSatish Balay         /* malloc new storage space */ \
16174c639caSSatish Balay         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
16230770e4dSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
16330770e4dSSatish Balay         new_j   = (int *) (new_a + new_nz); \
16430770e4dSSatish Balay         new_i   = new_j + new_nz; \
16530770e4dSSatish Balay  \
16630770e4dSSatish Balay         /* copy over old data into new slots */ \
16730770e4dSSatish Balay         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
16874c639caSSatish Balay         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
16930770e4dSSatish Balay         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
17030770e4dSSatish Balay         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
17130770e4dSSatish Balay         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
17230770e4dSSatish Balay                                                            len*sizeof(int)); \
17330770e4dSSatish Balay         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
17430770e4dSSatish Balay         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
17530770e4dSSatish Balay                                                            len*sizeof(Scalar));  \
17630770e4dSSatish Balay         /* free up old matrix storage */ \
17730770e4dSSatish Balay  \
17874c639caSSatish Balay         PetscFree(b->a);  \
17974c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
18074c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
18174c639caSSatish Balay         b->singlemalloc = 1; \
18230770e4dSSatish Balay  \
18330770e4dSSatish Balay         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
18430770e4dSSatish Balay         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
18574c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
18674c639caSSatish Balay         b->maxnz += CHUNKSIZE; \
18774c639caSSatish Balay         b->reallocs++; \
18830770e4dSSatish Balay       } \
18974c639caSSatish Balay       N = nrow++ - 1; b->nz++; \
19030770e4dSSatish Balay       /* shift up all the later entries in this row */ \
19130770e4dSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
19230770e4dSSatish Balay         rp[ii+1] = rp[ii]; \
19330770e4dSSatish Balay         ap[ii+1] = ap[ii]; \
19430770e4dSSatish Balay       } \
19530770e4dSSatish Balay       rp[_i] = col1;  \
19630770e4dSSatish Balay       ap[_i] = value;  \
19730770e4dSSatish Balay       b_noinsert: ; \
19830770e4dSSatish Balay       bilen[row] = nrow; \
19930770e4dSSatish Balay }
20030770e4dSSatish Balay 
2010520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
2025615d1e5SSatish Balay #undef __FUNC__
2035615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ"
2048f6be9afSLois Curfman McInnes int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
2058a729477SBarry Smith {
20644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
2074b0e389bSBarry Smith   Scalar     value;
2081eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
2091eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
210905e6a2fSBarry Smith   int        roworiented = aij->roworiented;
2118a729477SBarry Smith 
2120520107fSSatish Balay   /* Some Variables required in the macro */
2134ee7247eSSatish Balay   Mat        A = aij->A;
2144ee7247eSSatish Balay   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
21530770e4dSSatish Balay   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
21630770e4dSSatish Balay   Scalar     *aa = a->a;
21730770e4dSSatish Balay 
21830770e4dSSatish Balay   Mat        B = aij->B;
21930770e4dSSatish Balay   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
22030770e4dSSatish Balay   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
22130770e4dSSatish Balay   Scalar     *ba = b->a;
22230770e4dSSatish Balay 
223ba4e3ef2SSatish Balay   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
22430770e4dSSatish Balay   int        nonew = a->nonew,shift = a->indexshift;
22530770e4dSSatish Balay   Scalar     *ap;
2264ee7247eSSatish Balay 
2278a729477SBarry Smith   for ( i=0; i<m; i++ ) {
2280a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
229e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
230e3372554SBarry Smith     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
2310a198c4cSBarry Smith #endif
2324b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
2334b0e389bSBarry Smith       row = im[i] - rstart;
2341eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
2354b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
2364b0e389bSBarry Smith           col = in[j] - cstart;
2374b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
23830770e4dSSatish Balay           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
2390520107fSSatish Balay           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
2401eb62cbbSBarry Smith         }
2410a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
242e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
243e3372554SBarry Smith         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
2440a198c4cSBarry Smith #endif
2451eb62cbbSBarry Smith         else {
246227d817aSBarry Smith           if (mat->was_assembled) {
247905e6a2fSBarry Smith             if (!aij->colmap) {
248905e6a2fSBarry Smith               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
249905e6a2fSBarry Smith             }
250905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
251ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2522493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2534b0e389bSBarry Smith               col =  in[j];
2549bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
255f9508a3cSSatish Balay               B = aij->B;
256f9508a3cSSatish Balay               b = (Mat_SeqAIJ *) B->data;
257f9508a3cSSatish Balay               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
258f9508a3cSSatish Balay               ba = b->a;
259d6dfbf8fSBarry Smith             }
2609e25ed09SBarry Smith           }
2614b0e389bSBarry Smith           else col = in[j];
2624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
26330770e4dSSatish Balay           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
26430770e4dSSatish Balay           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
2651eb62cbbSBarry Smith         }
2661eb62cbbSBarry Smith       }
2671eb62cbbSBarry Smith     }
2681eb62cbbSBarry Smith     else {
26990f02eecSBarry Smith       if (roworiented && !aij->donotstash) {
2704b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
2714b0e389bSBarry Smith       }
2724b0e389bSBarry Smith       else {
27390f02eecSBarry Smith         if (!aij->donotstash) {
2744b0e389bSBarry Smith           row = im[i];
2754b0e389bSBarry Smith           for ( j=0; j<n; j++ ) {
2764b0e389bSBarry Smith             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
2774b0e389bSBarry Smith           }
2784b0e389bSBarry Smith         }
2791eb62cbbSBarry Smith       }
2808a729477SBarry Smith     }
28190f02eecSBarry Smith   }
2828a729477SBarry Smith   return 0;
2838a729477SBarry Smith }
2848a729477SBarry Smith 
2855615d1e5SSatish Balay #undef __FUNC__
2865615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ"
2878f6be9afSLois Curfman McInnes int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
288b49de8d1SLois Curfman McInnes {
289b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
290b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
291b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
292b49de8d1SLois Curfman McInnes 
293b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
294e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
295e3372554SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
296b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
297b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
298b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
299e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
300e3372554SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
301b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
302b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
303b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
304b49de8d1SLois Curfman McInnes         }
305b49de8d1SLois Curfman McInnes         else {
306905e6a2fSBarry Smith           if (!aij->colmap) {
307905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
308905e6a2fSBarry Smith           }
309905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
310e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
311d9d09a02SSatish Balay           else {
312b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
313b49de8d1SLois Curfman McInnes           }
314b49de8d1SLois Curfman McInnes         }
315b49de8d1SLois Curfman McInnes       }
316d9d09a02SSatish Balay     }
317b49de8d1SLois Curfman McInnes     else {
318e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
319b49de8d1SLois Curfman McInnes     }
320b49de8d1SLois Curfman McInnes   }
321b49de8d1SLois Curfman McInnes   return 0;
322b49de8d1SLois Curfman McInnes }
323b49de8d1SLois Curfman McInnes 
3245615d1e5SSatish Balay #undef __FUNC__
3255615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
3268f6be9afSLois Curfman McInnes int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
3278a729477SBarry Smith {
32844a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
329d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
33017699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
33117699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
3321eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
3336abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
3341eb62cbbSBarry Smith   InsertMode  addv;
3351eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
3361eb62cbbSBarry Smith 
3371eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
33847794344SBarry Smith   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
339dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
340e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
3411eb62cbbSBarry Smith   }
34247794344SBarry Smith   mat->insertmode = addv; /* in case this processor had no cache */
3431eb62cbbSBarry Smith 
3441eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3450452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
346cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3470452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
3481eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
3491eb62cbbSBarry Smith     idx = aij->stash.idx[i];
35017699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3511eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3521eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
3538a729477SBarry Smith       }
3548a729477SBarry Smith     }
3558a729477SBarry Smith   }
35617699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3571eb62cbbSBarry Smith 
3581eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3590452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
36017699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
36117699dbbSLois Curfman McInnes   nreceives = work[rank];
36217699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
36317699dbbSLois Curfman McInnes   nmax = work[rank];
3640452661fSBarry Smith   PetscFree(work);
3651eb62cbbSBarry Smith 
3661eb62cbbSBarry Smith   /* post receives:
3671eb62cbbSBarry Smith        1) each message will consist of ordered pairs
3681eb62cbbSBarry Smith      (global index,value) we store the global index as a double
369d6dfbf8fSBarry Smith      to simplify the message passing.
3701eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
3711eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
3721eb62cbbSBarry Smith      this is a lot of wasted space.
3731eb62cbbSBarry Smith 
3741eb62cbbSBarry Smith 
3751eb62cbbSBarry Smith        This could be done better.
3761eb62cbbSBarry Smith   */
3770452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
37878b31e54SBarry Smith   CHKPTRQ(rvalues);
3790452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
38078b31e54SBarry Smith   CHKPTRQ(recv_waits);
3811eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
382416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
3831eb62cbbSBarry Smith               comm,recv_waits+i);
3841eb62cbbSBarry Smith   }
3851eb62cbbSBarry Smith 
3861eb62cbbSBarry Smith   /* do sends:
3871eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3881eb62cbbSBarry Smith          the ith processor
3891eb62cbbSBarry Smith   */
3900452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
3910452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
39278b31e54SBarry Smith   CHKPTRQ(send_waits);
3930452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
3941eb62cbbSBarry Smith   starts[0] = 0;
39517699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3961eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
3971eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
3981eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
3991eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
4001eb62cbbSBarry Smith   }
4010452661fSBarry Smith   PetscFree(owner);
4021eb62cbbSBarry Smith   starts[0] = 0;
40317699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4041eb62cbbSBarry Smith   count = 0;
40517699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
4061eb62cbbSBarry Smith     if (procs[i]) {
407416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
4081eb62cbbSBarry Smith                 comm,send_waits+count++);
4091eb62cbbSBarry Smith     }
4101eb62cbbSBarry Smith   }
4110452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
4121eb62cbbSBarry Smith 
4131eb62cbbSBarry Smith   /* Free cache space */
41455908cccSLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
41578b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
4161eb62cbbSBarry Smith 
4171eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
4181eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
4191eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
4201eb62cbbSBarry Smith   aij->rmax       = nmax;
4211eb62cbbSBarry Smith 
4221eb62cbbSBarry Smith   return 0;
4231eb62cbbSBarry Smith }
42444a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
4251eb62cbbSBarry Smith 
4265615d1e5SSatish Balay #undef __FUNC__
4275615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
4288f6be9afSLois Curfman McInnes int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
4291eb62cbbSBarry Smith {
43044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
4311eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
432416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
433905e6a2fSBarry Smith   int         row,col,other_disassembled;
4341eb62cbbSBarry Smith   Scalar      *values,val;
43547794344SBarry Smith   InsertMode  addv = mat->insertmode;
4361eb62cbbSBarry Smith 
4371eb62cbbSBarry Smith   /*  wait on receives */
4381eb62cbbSBarry Smith   while (count) {
439d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
4401eb62cbbSBarry Smith     /* unpack receives into our local space */
441d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
442c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
4431eb62cbbSBarry Smith     n = n/3;
4441eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
445227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
446227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
4471eb62cbbSBarry Smith       val = values[3*i+2];
4481eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
4491eb62cbbSBarry Smith         col -= aij->cstart;
4506fd7127cSSatish Balay         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
4511eb62cbbSBarry Smith       }
4521eb62cbbSBarry Smith       else {
45355a1b374SBarry Smith         if (mat->was_assembled || mat->assembled) {
454905e6a2fSBarry Smith           if (!aij->colmap) {
455905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
456905e6a2fSBarry Smith           }
457905e6a2fSBarry Smith           col = aij->colmap[col] - 1;
458ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4592493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
460227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
461d6dfbf8fSBarry Smith           }
4629e25ed09SBarry Smith         }
4636fd7127cSSatish Balay         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
4641eb62cbbSBarry Smith       }
4651eb62cbbSBarry Smith     }
4661eb62cbbSBarry Smith     count--;
4671eb62cbbSBarry Smith   }
4680452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
4691eb62cbbSBarry Smith 
4701eb62cbbSBarry Smith   /* wait on sends */
4711eb62cbbSBarry Smith   if (aij->nsends) {
4720a198c4cSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
4731eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
4740452661fSBarry Smith     PetscFree(send_status);
4751eb62cbbSBarry Smith   }
4760452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
4771eb62cbbSBarry Smith 
47878b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
47978b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
4801eb62cbbSBarry Smith 
4812493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
4822493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
483227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
484227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
4852493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
4862493cbb0SBarry Smith   }
4872493cbb0SBarry Smith 
4886d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
48978b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
4905e42470aSBarry Smith   }
49178b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
49278b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
4935e42470aSBarry Smith 
4947a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
4958a729477SBarry Smith   return 0;
4968a729477SBarry Smith }
4978a729477SBarry Smith 
4985615d1e5SSatish Balay #undef __FUNC__
4995615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ"
5008f6be9afSLois Curfman McInnes int MatZeroEntries_MPIAIJ(Mat A)
5011eb62cbbSBarry Smith {
50244a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
503dbd7a890SLois Curfman McInnes   int        ierr;
50478b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
50578b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
5061eb62cbbSBarry Smith   return 0;
5071eb62cbbSBarry Smith }
5081eb62cbbSBarry Smith 
5091eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
5101eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
5111eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
5121eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
5131eb62cbbSBarry Smith    routine.
5141eb62cbbSBarry Smith */
5155615d1e5SSatish Balay #undef __FUNC__
5165615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ"
5178f6be9afSLois Curfman McInnes int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
5181eb62cbbSBarry Smith {
51944a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
52017699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
5216abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
52217699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
5235392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
524d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
525d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
5261eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
5271eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
5281eb62cbbSBarry Smith   IS             istmp;
5291eb62cbbSBarry Smith 
53077c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
53178b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
5321eb62cbbSBarry Smith 
5331eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
5340452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
535cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
5360452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
5371eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
5381eb62cbbSBarry Smith     idx = rows[i];
5391eb62cbbSBarry Smith     found = 0;
54017699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
5411eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
5421eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
5431eb62cbbSBarry Smith       }
5441eb62cbbSBarry Smith     }
545e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
5461eb62cbbSBarry Smith   }
54717699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
5481eb62cbbSBarry Smith 
5491eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
5500452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
55117699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
55217699dbbSLois Curfman McInnes   nrecvs = work[rank];
55317699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
55417699dbbSLois Curfman McInnes   nmax = work[rank];
5550452661fSBarry Smith   PetscFree(work);
5561eb62cbbSBarry Smith 
5571eb62cbbSBarry Smith   /* post receives:   */
5580452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
55978b31e54SBarry Smith   CHKPTRQ(rvalues);
5600452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
56178b31e54SBarry Smith   CHKPTRQ(recv_waits);
5621eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
563416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
5641eb62cbbSBarry Smith   }
5651eb62cbbSBarry Smith 
5661eb62cbbSBarry Smith   /* do sends:
5671eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
5681eb62cbbSBarry Smith          the ith processor
5691eb62cbbSBarry Smith   */
5700452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
5710452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
57278b31e54SBarry Smith   CHKPTRQ(send_waits);
5730452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
5741eb62cbbSBarry Smith   starts[0] = 0;
57517699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
5761eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
5771eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
5781eb62cbbSBarry Smith   }
5791eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
5801eb62cbbSBarry Smith 
5811eb62cbbSBarry Smith   starts[0] = 0;
58217699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
5831eb62cbbSBarry Smith   count = 0;
58417699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
5851eb62cbbSBarry Smith     if (procs[i]) {
586416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
5871eb62cbbSBarry Smith     }
5881eb62cbbSBarry Smith   }
5890452661fSBarry Smith   PetscFree(starts);
5901eb62cbbSBarry Smith 
59117699dbbSLois Curfman McInnes   base = owners[rank];
5921eb62cbbSBarry Smith 
5931eb62cbbSBarry Smith   /*  wait on receives */
5940452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
5951eb62cbbSBarry Smith   source = lens + nrecvs;
5961eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
5971eb62cbbSBarry Smith   while (count) {
598d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
5991eb62cbbSBarry Smith     /* unpack receives into our local space */
6001eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
601d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
602d6dfbf8fSBarry Smith     lens[imdex]  = n;
6031eb62cbbSBarry Smith     slen += n;
6041eb62cbbSBarry Smith     count--;
6051eb62cbbSBarry Smith   }
6060452661fSBarry Smith   PetscFree(recv_waits);
6071eb62cbbSBarry Smith 
6081eb62cbbSBarry Smith   /* move the data into the send scatter */
6090452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
6101eb62cbbSBarry Smith   count = 0;
6111eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
6121eb62cbbSBarry Smith     values = rvalues + i*nmax;
6131eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
6141eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
6151eb62cbbSBarry Smith     }
6161eb62cbbSBarry Smith   }
6170452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
6180452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
6191eb62cbbSBarry Smith 
6201eb62cbbSBarry Smith   /* actually zap the local rows */
621029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
622464493b3SBarry Smith   PLogObjectParent(A,istmp);
6230452661fSBarry Smith   PetscFree(lrows);
62478b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
62578b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
62678b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
6271eb62cbbSBarry Smith 
6281eb62cbbSBarry Smith   /* wait on sends */
6291eb62cbbSBarry Smith   if (nsends) {
6300452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
63178b31e54SBarry Smith     CHKPTRQ(send_status);
6321eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
6330452661fSBarry Smith     PetscFree(send_status);
6341eb62cbbSBarry Smith   }
6350452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
6361eb62cbbSBarry Smith 
6371eb62cbbSBarry Smith   return 0;
6381eb62cbbSBarry Smith }
6391eb62cbbSBarry Smith 
6405615d1e5SSatish Balay #undef __FUNC__
6415615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ"
6428f6be9afSLois Curfman McInnes int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
6431eb62cbbSBarry Smith {
644416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
645fbd6ef76SBarry Smith   int        ierr,nt;
646416022c9SBarry Smith 
647a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
648fbd6ef76SBarry Smith   if (nt != a->n) {
649f40265daSLois Curfman McInnes     SETERRQ(1,0,"Incompatible partition of A and xx");
650fbd6ef76SBarry Smith   }
65143a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
65238597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
65343a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
65438597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
6551eb62cbbSBarry Smith   return 0;
6561eb62cbbSBarry Smith }
6571eb62cbbSBarry Smith 
6585615d1e5SSatish Balay #undef __FUNC__
6595615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ"
6608f6be9afSLois Curfman McInnes int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
661da3a660dSBarry Smith {
662416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
663da3a660dSBarry Smith   int        ierr;
66443a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
66538597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
66643a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
66738597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
668da3a660dSBarry Smith   return 0;
669da3a660dSBarry Smith }
670da3a660dSBarry Smith 
6715615d1e5SSatish Balay #undef __FUNC__
6725615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ"
6738f6be9afSLois Curfman McInnes int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
674da3a660dSBarry Smith {
675416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
676da3a660dSBarry Smith   int        ierr;
677da3a660dSBarry Smith 
678da3a660dSBarry Smith   /* do nondiagonal part */
67938597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
680da3a660dSBarry Smith   /* send it on its way */
681537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
682da3a660dSBarry Smith   /* do local part */
68338597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
684da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
685da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
686da3a660dSBarry Smith   /* but is not perhaps always true. */
687537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
688da3a660dSBarry Smith   return 0;
689da3a660dSBarry Smith }
690da3a660dSBarry Smith 
6915615d1e5SSatish Balay #undef __FUNC__
6925615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ"
6938f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
694da3a660dSBarry Smith {
695416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
696da3a660dSBarry Smith   int        ierr;
697da3a660dSBarry Smith 
698da3a660dSBarry Smith   /* do nondiagonal part */
69938597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
700da3a660dSBarry Smith   /* send it on its way */
701537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
702da3a660dSBarry Smith   /* do local part */
70338597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
704da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
705da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
706da3a660dSBarry Smith   /* but is not perhaps always true. */
7070a198c4cSBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
708da3a660dSBarry Smith   return 0;
709da3a660dSBarry Smith }
710da3a660dSBarry Smith 
7111eb62cbbSBarry Smith /*
7121eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
7131eb62cbbSBarry Smith    diagonal block
7141eb62cbbSBarry Smith */
7155615d1e5SSatish Balay #undef __FUNC__
7165615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ"
7178f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
7181eb62cbbSBarry Smith {
719416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
72044cd7ae7SLois Curfman McInnes   if (a->M != a->N)
721e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
7225baf8537SBarry Smith   if (a->rstart != a->cstart || a->rend != a->cend) {
723e3372554SBarry Smith     SETERRQ(1,0,"row partition must equal col partition");  }
724416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
7251eb62cbbSBarry Smith }
7261eb62cbbSBarry Smith 
7275615d1e5SSatish Balay #undef __FUNC__
7285615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ"
7298f6be9afSLois Curfman McInnes int MatScale_MPIAIJ(Scalar *aa,Mat A)
730052efed2SBarry Smith {
731052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
732052efed2SBarry Smith   int        ierr;
733052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
734052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
735052efed2SBarry Smith   return 0;
736052efed2SBarry Smith }
737052efed2SBarry Smith 
7385615d1e5SSatish Balay #undef __FUNC__
7395eea60f9SBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */
7408f6be9afSLois Curfman McInnes int MatDestroy_MPIAIJ(PetscObject obj)
7411eb62cbbSBarry Smith {
7421eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
74344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
7441eb62cbbSBarry Smith   int        ierr;
745*83e2fdc7SBarry Smith 
746a5a9c739SBarry Smith #if defined(PETSC_LOG)
7476e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
748a5a9c739SBarry Smith #endif
749*83e2fdc7SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
7500452661fSBarry Smith   PetscFree(aij->rowners);
75178b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
75278b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
7530452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
7540452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
7551eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
756a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
7577a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
7580452661fSBarry Smith   PetscFree(aij);
75990f02eecSBarry Smith   if (mat->mapping) {
76090f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
76190f02eecSBarry Smith   }
762a5a9c739SBarry Smith   PLogObjectDestroy(mat);
7630452661fSBarry Smith   PetscHeaderDestroy(mat);
7641eb62cbbSBarry Smith   return 0;
7651eb62cbbSBarry Smith }
766ee50ffe9SBarry Smith 
7675615d1e5SSatish Balay #undef __FUNC__
7685eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */
7698f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
7701eb62cbbSBarry Smith {
771416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
772416022c9SBarry Smith   int         ierr;
773416022c9SBarry Smith 
77417699dbbSLois Curfman McInnes   if (aij->size == 1) {
775416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
776416022c9SBarry Smith   }
777e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
778416022c9SBarry Smith   return 0;
779416022c9SBarry Smith }
780416022c9SBarry Smith 
7815615d1e5SSatish Balay #undef __FUNC__
7825eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */
7838f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
784416022c9SBarry Smith {
78544a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
786dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
787a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
788d636dbe3SBarry Smith   FILE        *fd;
78919bcc07fSBarry Smith   ViewerType  vtype;
790416022c9SBarry Smith 
79119bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
79219bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
79390ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
7940a198c4cSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
7954e220ebcSLois Curfman McInnes       MatInfo info;
7964e220ebcSLois Curfman McInnes       int     flg;
797a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
79890ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
7994e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
80095e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
80177c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
80295e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
8034e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
80495e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
8054e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
8064e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
8074e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
8084e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
8094e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
810a56f8943SBarry Smith       fflush(fd);
81177c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
812a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
813416022c9SBarry Smith       return 0;
814416022c9SBarry Smith     }
8150a198c4cSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
81608480c60SBarry Smith       return 0;
81708480c60SBarry Smith     }
818416022c9SBarry Smith   }
819416022c9SBarry Smith 
82019bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
82119bcc07fSBarry Smith     Draw       draw;
82219bcc07fSBarry Smith     PetscTruth isnull;
82319bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
82419bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
82519bcc07fSBarry Smith   }
82619bcc07fSBarry Smith 
82719bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
82890ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
82977c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
830d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
83117699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
8321eb62cbbSBarry Smith            aij->cend);
83378b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
83478b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
835d13ab20cSBarry Smith     fflush(fd);
83677c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
837d13ab20cSBarry Smith   }
838416022c9SBarry Smith   else {
839a56f8943SBarry Smith     int size = aij->size;
840a56f8943SBarry Smith     rank = aij->rank;
84117699dbbSLois Curfman McInnes     if (size == 1) {
84278b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
84395373324SBarry Smith     }
84495373324SBarry Smith     else {
84595373324SBarry Smith       /* assemble the entire matrix onto first processor. */
84695373324SBarry Smith       Mat         A;
847ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
8482eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
84995373324SBarry Smith       Scalar      *a;
8502ee70a88SLois Curfman McInnes 
85117699dbbSLois Curfman McInnes       if (!rank) {
852b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
853c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
85495373324SBarry Smith       }
85595373324SBarry Smith       else {
856b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
857c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
85895373324SBarry Smith       }
859464493b3SBarry Smith       PLogObjectParent(mat,A);
860416022c9SBarry Smith 
86195373324SBarry Smith       /* copy over the A part */
862ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
8632ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
86495373324SBarry Smith       row = aij->rstart;
865dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
86695373324SBarry Smith       for ( i=0; i<m; i++ ) {
867416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
86895373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
86995373324SBarry Smith       }
8702ee70a88SLois Curfman McInnes       aj = Aloc->j;
871dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
87295373324SBarry Smith 
87395373324SBarry Smith       /* copy over the B part */
874ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
8752ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
87695373324SBarry Smith       row = aij->rstart;
8770452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
878dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
87995373324SBarry Smith       for ( i=0; i<m; i++ ) {
880416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
88195373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
88295373324SBarry Smith       }
8830452661fSBarry Smith       PetscFree(ct);
8846d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8856d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
88617699dbbSLois Curfman McInnes       if (!rank) {
88778b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
88895373324SBarry Smith       }
88978b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
89095373324SBarry Smith     }
89195373324SBarry Smith   }
8921eb62cbbSBarry Smith   return 0;
8931eb62cbbSBarry Smith }
8941eb62cbbSBarry Smith 
8955615d1e5SSatish Balay #undef __FUNC__
8965eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */
8978f6be9afSLois Curfman McInnes int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
898416022c9SBarry Smith {
899416022c9SBarry Smith   Mat         mat = (Mat) obj;
900416022c9SBarry Smith   int         ierr;
90119bcc07fSBarry Smith   ViewerType  vtype;
902416022c9SBarry Smith 
90319bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
90419bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
90519bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
906d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
907416022c9SBarry Smith   }
90819bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
909416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
910416022c9SBarry Smith   }
911416022c9SBarry Smith   return 0;
912416022c9SBarry Smith }
913416022c9SBarry Smith 
9141eb62cbbSBarry Smith /*
9151eb62cbbSBarry Smith     This has to provide several versions.
9161eb62cbbSBarry Smith 
9171eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
9181eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
919d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
9201eb62cbbSBarry Smith */
9215615d1e5SSatish Balay #undef __FUNC__
9225615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ"
9238f6be9afSLois Curfman McInnes int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
924dbb450caSBarry Smith                            double fshift,int its,Vec xx)
9258a729477SBarry Smith {
92644a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
927d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
928ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
929c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
9306abc6512SBarry Smith   int        ierr,*idx, *diag;
931416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
9328a729477SBarry Smith 
933d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
934dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
935dbb450caSBarry Smith   ls = ls + shift;
936*83e2fdc7SBarry Smith   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
937d6dfbf8fSBarry Smith   diag = A->diag;
938c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
939da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
94038597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
941da3a660dSBarry Smith     }
94243a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
94378b31e54SBarry Smith     CHKERRQ(ierr);
94443a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
94578b31e54SBarry Smith     CHKERRQ(ierr);
946d6dfbf8fSBarry Smith     while (its--) {
947d6dfbf8fSBarry Smith       /* go down through the rows */
948d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
949d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
950dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
951dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
952d6dfbf8fSBarry Smith         sum  = b[i];
953d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
954dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
955d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
956dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
957dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
958d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
95955a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
960d6dfbf8fSBarry Smith       }
961d6dfbf8fSBarry Smith       /* come up through the rows */
962d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
963d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
964dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
965dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
966d6dfbf8fSBarry Smith         sum  = b[i];
967d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
968dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
969d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
970dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
971dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
972d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
97355a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
974d6dfbf8fSBarry Smith       }
975d6dfbf8fSBarry Smith     }
976d6dfbf8fSBarry Smith   }
977d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
978da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
97938597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
980da3a660dSBarry Smith     }
98143a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
98278b31e54SBarry Smith     CHKERRQ(ierr);
98343a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
98478b31e54SBarry Smith     CHKERRQ(ierr);
985d6dfbf8fSBarry Smith     while (its--) {
986d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
987d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
988dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
989dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
990d6dfbf8fSBarry Smith         sum  = b[i];
991d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
992dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
993d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
994dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
995dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
996d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
99755a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
998d6dfbf8fSBarry Smith       }
999d6dfbf8fSBarry Smith     }
1000d6dfbf8fSBarry Smith   }
1001d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1002da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
100338597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
1004da3a660dSBarry Smith     }
100543a90d84SBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
100678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
100743a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
100878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
1009d6dfbf8fSBarry Smith     while (its--) {
1010d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
1011d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
1012dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
1013dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
1014d6dfbf8fSBarry Smith         sum  = b[i];
1015d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1016dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
1017d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
1018dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
1019dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
1020d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
102155a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1022d6dfbf8fSBarry Smith       }
1023d6dfbf8fSBarry Smith     }
1024d6dfbf8fSBarry Smith   }
1025c16cb8f2SBarry Smith   else {
1026e3372554SBarry Smith     SETERRQ(1,0,"Option not supported");
1027c16cb8f2SBarry Smith   }
10288a729477SBarry Smith   return 0;
10298a729477SBarry Smith }
1030a66be287SLois Curfman McInnes 
10315615d1e5SSatish Balay #undef __FUNC__
10325eea60f9SBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */
10338f6be9afSLois Curfman McInnes int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1034a66be287SLois Curfman McInnes {
1035a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1036a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
10374e220ebcSLois Curfman McInnes   int        ierr;
10384e220ebcSLois Curfman McInnes   double     isend[5], irecv[5];
1039a66be287SLois Curfman McInnes 
10404e220ebcSLois Curfman McInnes   info->rows_global    = (double)mat->M;
10414e220ebcSLois Curfman McInnes   info->columns_global = (double)mat->N;
10424e220ebcSLois Curfman McInnes   info->rows_local     = (double)mat->m;
10434e220ebcSLois Curfman McInnes   info->columns_local  = (double)mat->N;
10444e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
10454e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
10464e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
10474e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
10484e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
10494e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
10504e220ebcSLois Curfman McInnes   isend[3] += info->memory;  isend[4] += info->mallocs;
1051a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
10524e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
10534e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
10544e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
10554e220ebcSLois Curfman McInnes     info->memory       = isend[3];
10564e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
1057a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
10584e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
10594e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10604e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10614e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10624e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10634e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1064a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
10654e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
10664e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10674e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10684e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10694e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10704e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1071a66be287SLois Curfman McInnes   }
10724e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
10734e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
10744e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
10754e220ebcSLois Curfman McInnes 
1076a66be287SLois Curfman McInnes   return 0;
1077a66be287SLois Curfman McInnes }
1078a66be287SLois Curfman McInnes 
1079299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1080299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1081299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1082299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1083299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1084299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1085299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1086299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1087299609e3SLois Curfman McInnes 
10885615d1e5SSatish Balay #undef __FUNC__
10895eea60f9SBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */
10908f6be9afSLois Curfman McInnes int MatSetOption_MPIAIJ(Mat A,MatOption op)
1091c74985f6SBarry Smith {
1092c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1093c74985f6SBarry Smith 
10946d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
10956d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
10966da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1097c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
109896854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1099c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1100b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1101b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1102b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1103aeafbbfcSLois Curfman McInnes     a->roworiented = 1;
1104c0bbcb79SLois Curfman McInnes     MatSetOption(a->A,op);
1105c0bbcb79SLois Curfman McInnes     MatSetOption(a->B,op);
1106b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
11076da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
11086d4a8577SBarry Smith              op == MAT_SYMMETRIC ||
11096d4a8577SBarry Smith              op == MAT_STRUCTURALLY_SYMMETRIC ||
11106d4a8577SBarry Smith              op == MAT_YES_NEW_DIAGONALS)
111194a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
11126d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
11134b0e389bSBarry Smith     a->roworiented = 0;
11144b0e389bSBarry Smith     MatSetOption(a->A,op);
11154b0e389bSBarry Smith     MatSetOption(a->B,op);
11162b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
111790f02eecSBarry Smith     a->donotstash = 1;
111890f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1119e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1120c0bbcb79SLois Curfman McInnes   else
1121e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1122c74985f6SBarry Smith   return 0;
1123c74985f6SBarry Smith }
1124c74985f6SBarry Smith 
11255615d1e5SSatish Balay #undef __FUNC__
11265eea60f9SBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */
11278f6be9afSLois Curfman McInnes int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1128c74985f6SBarry Smith {
112944a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1130c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1131c74985f6SBarry Smith   return 0;
1132c74985f6SBarry Smith }
1133c74985f6SBarry Smith 
11345615d1e5SSatish Balay #undef __FUNC__
11355eea60f9SBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */
11368f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1137c74985f6SBarry Smith {
113844a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1139b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1140c74985f6SBarry Smith   return 0;
1141c74985f6SBarry Smith }
1142c74985f6SBarry Smith 
11435615d1e5SSatish Balay #undef __FUNC__
11445eea60f9SBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */
11458f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1146c74985f6SBarry Smith {
114744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1148c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1149c74985f6SBarry Smith   return 0;
1150c74985f6SBarry Smith }
1151c74985f6SBarry Smith 
11526d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11536d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11546d84be18SBarry Smith 
11555615d1e5SSatish Balay #undef __FUNC__
11565615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ"
11576d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
115839e00950SLois Curfman McInnes {
1159154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
116070f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1161154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1162154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
116370f0671dSBarry Smith   int        *cmap, *idx_p;
116439e00950SLois Curfman McInnes 
1165e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
11667a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
11677a0afa10SBarry Smith 
116870f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
11697a0afa10SBarry Smith     /*
11707a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
11717a0afa10SBarry Smith     */
11727a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1173c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
1174c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
11757a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
11767a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
11777a0afa10SBarry Smith     }
11787a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
11797a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
11807a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
11817a0afa10SBarry Smith   }
11827a0afa10SBarry Smith 
1183e3372554SBarry Smith   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1184abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
118539e00950SLois Curfman McInnes 
1186154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1187154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1188154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
118938597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
119038597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1191154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1192154123eaSLois Curfman McInnes 
119370f0671dSBarry Smith   cmap  = mat->garray;
1194154123eaSLois Curfman McInnes   if (v  || idx) {
1195154123eaSLois Curfman McInnes     if (nztot) {
1196154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
119770f0671dSBarry Smith       int imark = -1;
1198154123eaSLois Curfman McInnes       if (v) {
119970f0671dSBarry Smith         *v = v_p = mat->rowvalues;
120039e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
120170f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1202154123eaSLois Curfman McInnes           else break;
1203154123eaSLois Curfman McInnes         }
1204154123eaSLois Curfman McInnes         imark = i;
120570f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
120670f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1207154123eaSLois Curfman McInnes       }
1208154123eaSLois Curfman McInnes       if (idx) {
120970f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
121070f0671dSBarry Smith         if (imark > -1) {
121170f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
121270f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
121370f0671dSBarry Smith           }
121470f0671dSBarry Smith         } else {
1215154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
121670f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1217154123eaSLois Curfman McInnes             else break;
1218154123eaSLois Curfman McInnes           }
1219154123eaSLois Curfman McInnes           imark = i;
122070f0671dSBarry Smith         }
122170f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
122270f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
122339e00950SLois Curfman McInnes       }
122439e00950SLois Curfman McInnes     }
12251ca473b0SSatish Balay     else {
12261ca473b0SSatish Balay       if (idx) *idx = 0;
12271ca473b0SSatish Balay       if (v)   *v   = 0;
12281ca473b0SSatish Balay     }
1229154123eaSLois Curfman McInnes   }
123039e00950SLois Curfman McInnes   *nz = nztot;
123138597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
123238597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
123339e00950SLois Curfman McInnes   return 0;
123439e00950SLois Curfman McInnes }
123539e00950SLois Curfman McInnes 
12365615d1e5SSatish Balay #undef __FUNC__
12375eea60f9SBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */
12386d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
123939e00950SLois Curfman McInnes {
12407a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
12417a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
1242e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
12437a0afa10SBarry Smith   }
12447a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
124539e00950SLois Curfman McInnes   return 0;
124639e00950SLois Curfman McInnes }
124739e00950SLois Curfman McInnes 
12485615d1e5SSatish Balay #undef __FUNC__
12495615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ"
12508f6be9afSLois Curfman McInnes int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1251855ac2c5SLois Curfman McInnes {
1252855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1253ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1254416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1255416022c9SBarry Smith   double     sum = 0.0;
125604ca555eSLois Curfman McInnes   Scalar     *v;
125704ca555eSLois Curfman McInnes 
125817699dbbSLois Curfman McInnes   if (aij->size == 1) {
125914183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
126037fa93a5SLois Curfman McInnes   } else {
126104ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
126204ca555eSLois Curfman McInnes       v = amat->a;
126304ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
126404ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
126504ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
126604ca555eSLois Curfman McInnes #else
126704ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
126804ca555eSLois Curfman McInnes #endif
126904ca555eSLois Curfman McInnes       }
127004ca555eSLois Curfman McInnes       v = bmat->a;
127104ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
127204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
127304ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
127404ca555eSLois Curfman McInnes #else
127504ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
127604ca555eSLois Curfman McInnes #endif
127704ca555eSLois Curfman McInnes       }
12786d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
127904ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
128004ca555eSLois Curfman McInnes     }
128104ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
128204ca555eSLois Curfman McInnes       double *tmp, *tmp2;
128304ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
1284758f045eSSatish Balay       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1285758f045eSSatish Balay       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1286cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
128704ca555eSLois Curfman McInnes       *norm = 0.0;
128804ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
128904ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1290579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
129104ca555eSLois Curfman McInnes       }
129204ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
129304ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1294579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
129504ca555eSLois Curfman McInnes       }
12966d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
129704ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
129804ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
129904ca555eSLois Curfman McInnes       }
13000452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
130104ca555eSLois Curfman McInnes     }
130204ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1303515d9167SLois Curfman McInnes       double ntemp = 0.0;
130404ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1305dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
130604ca555eSLois Curfman McInnes         sum = 0.0;
130704ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1308cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
130904ca555eSLois Curfman McInnes         }
1310dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
131104ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1312cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
131304ca555eSLois Curfman McInnes         }
1314515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
131504ca555eSLois Curfman McInnes       }
13166d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
131704ca555eSLois Curfman McInnes     }
131804ca555eSLois Curfman McInnes     else {
1319e3372554SBarry Smith       SETERRQ(1,0,"No support for two norm");
132004ca555eSLois Curfman McInnes     }
132137fa93a5SLois Curfman McInnes   }
1322855ac2c5SLois Curfman McInnes   return 0;
1323855ac2c5SLois Curfman McInnes }
1324855ac2c5SLois Curfman McInnes 
13255615d1e5SSatish Balay #undef __FUNC__
13265615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ"
13278f6be9afSLois Curfman McInnes int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1328b7c46309SBarry Smith {
1329b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1330dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1331416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1332416022c9SBarry Smith   Mat        B;
1333b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1334b7c46309SBarry Smith   Scalar     *array;
1335b7c46309SBarry Smith 
13363638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
1337e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
1338b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1339b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1340b7c46309SBarry Smith 
1341b7c46309SBarry Smith   /* copy over the A part */
1342ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1343b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1344b7c46309SBarry Smith   row = a->rstart;
1345dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1346b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1347416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1348b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1349b7c46309SBarry Smith   }
1350b7c46309SBarry Smith   aj = Aloc->j;
13514af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1352b7c46309SBarry Smith 
1353b7c46309SBarry Smith   /* copy over the B part */
1354ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1355b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1356b7c46309SBarry Smith   row = a->rstart;
13570452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1358dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1359b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1360416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1361b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1362b7c46309SBarry Smith   }
13630452661fSBarry Smith   PetscFree(ct);
13646d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
13656d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
13663638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
13670de55854SLois Curfman McInnes     *matout = B;
13680de55854SLois Curfman McInnes   } else {
13690de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
13700452661fSBarry Smith     PetscFree(a->rowners);
13710de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
13720de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
13730452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
13740452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
13750de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1376a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
13770452661fSBarry Smith     PetscFree(a);
1378f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
13790452661fSBarry Smith     PetscHeaderDestroy(B);
13800de55854SLois Curfman McInnes   }
1381b7c46309SBarry Smith   return 0;
1382b7c46309SBarry Smith }
1383b7c46309SBarry Smith 
13845615d1e5SSatish Balay #undef __FUNC__
13855615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ"
13864b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1387a008b906SSatish Balay {
13884b967eb1SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
13894b967eb1SSatish Balay   Mat a = aij->A, b = aij->B;
1390a008b906SSatish Balay   int ierr,s1,s2,s3;
1391a008b906SSatish Balay 
13924b967eb1SSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
13934b967eb1SSatish Balay   if (rr) {
13944b967eb1SSatish Balay     s3 = aij->n;
13954b967eb1SSatish Balay     VecGetLocalSize_Fast(rr,s1);
1396e3372554SBarry Smith     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
13974b967eb1SSatish Balay     /* Overlap communication with computation. */
139843a90d84SBarry Smith     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1399a008b906SSatish Balay   }
14004b967eb1SSatish Balay   if (ll) {
14014b967eb1SSatish Balay     VecGetLocalSize_Fast(ll,s1);
1402e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1403c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
14044b967eb1SSatish Balay   }
14054b967eb1SSatish Balay   /* scale  the diagonal block */
1406c351683dSSatish Balay   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
14074b967eb1SSatish Balay 
14084b967eb1SSatish Balay   if (rr) {
14094b967eb1SSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
141043a90d84SBarry Smith     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1411c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
14124b967eb1SSatish Balay   }
14134b967eb1SSatish Balay 
1414a008b906SSatish Balay   return 0;
1415a008b906SSatish Balay }
1416a008b906SSatish Balay 
1417a008b906SSatish Balay 
1418682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
14195615d1e5SSatish Balay #undef __FUNC__
14205eea60f9SBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */
14218f6be9afSLois Curfman McInnes int MatPrintHelp_MPIAIJ(Mat A)
1422682d7d0cSBarry Smith {
1423682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1424682d7d0cSBarry Smith 
1425682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1426682d7d0cSBarry Smith   else return 0;
1427682d7d0cSBarry Smith }
1428682d7d0cSBarry Smith 
14295615d1e5SSatish Balay #undef __FUNC__
14305eea60f9SBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */
14318f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
14325a838052SSatish Balay {
14335a838052SSatish Balay   *bs = 1;
14345a838052SSatish Balay   return 0;
14355a838052SSatish Balay }
14365615d1e5SSatish Balay #undef __FUNC__
14375eea60f9SBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */
14388f6be9afSLois Curfman McInnes int MatSetUnfactored_MPIAIJ(Mat A)
1439bb5a7306SBarry Smith {
1440bb5a7306SBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1441bb5a7306SBarry Smith   int        ierr;
1442bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1443bb5a7306SBarry Smith   return 0;
1444bb5a7306SBarry Smith }
1445bb5a7306SBarry Smith 
14468f6be9afSLois Curfman McInnes extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
14472f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
14480a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
14490a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
14508a729477SBarry Smith /* -------------------------------------------------------------------*/
14512ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
145239e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
145344a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
145444a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
145536ce4990SBarry Smith        0,0,
145636ce4990SBarry Smith        0,0,
145736ce4990SBarry Smith        0,0,
145844a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1459b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1460a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1461a008b906SSatish Balay        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1462ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
14631eb62cbbSBarry Smith        0,
1464299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
146536ce4990SBarry Smith        0,0,0,0,
1466d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
146736ce4990SBarry Smith        0,0,
146894a9d846SBarry Smith        0,0,MatConvertSameType_MPIAIJ,0,0,
1469b49de8d1SLois Curfman McInnes        0,0,0,
1470598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1471052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
14723b2fbd54SBarry Smith        MatScale_MPIAIJ,0,0,0,
14730a198c4cSBarry Smith        MatGetBlockSize_MPIAIJ,0,0,0,0,
1474bb5a7306SBarry Smith        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
147536ce4990SBarry Smith 
14768a729477SBarry Smith 
14775615d1e5SSatish Balay #undef __FUNC__
14785615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ"
14791987afe7SBarry Smith /*@C
1480ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
14813a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
14823a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
14833a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
14843a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
14858a729477SBarry Smith 
14868a729477SBarry Smith    Input Parameters:
14871eb62cbbSBarry Smith .  comm - MPI communicator
14887d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
148992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
149092e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
14911a3896d6SBarry Smith .  n - This value should be the same as the local size used in creating the
14921a3896d6SBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
14931a3896d6SBarry Smith        calculated if N is given) For square matrices n is almost always m.
14947d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
149592e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1496ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1497ff756334SLois Curfman McInnes            (same for all local rows)
14982bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
149992e8d321SLois Curfman McInnes            diagonal portion of the local submatrix (possibly different for each row)
15002bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
15012bd5e0b2SLois Curfman McInnes            it is zero.
15022bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1503ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
15042bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
15052bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
15062bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
15078a729477SBarry Smith 
1508ff756334SLois Curfman McInnes    Output Parameter:
150944cd7ae7SLois Curfman McInnes .  A - the matrix
15108a729477SBarry Smith 
1511ff756334SLois Curfman McInnes    Notes:
1512ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1513ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
15140002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
15150002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1516ff756334SLois Curfman McInnes 
1517ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1518ff756334SLois Curfman McInnes    (possibly both).
1519ff756334SLois Curfman McInnes 
15205511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
15215511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
15225511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
15235511cfe3SLois Curfman McInnes 
15245511cfe3SLois Curfman McInnes    Options Database Keys:
15255511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
15266e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
15276e62573dSLois Curfman McInnes $        (max limit=5)
15286e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
15296e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
15306e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
15315511cfe3SLois Curfman McInnes 
1532e0245417SLois Curfman McInnes    Storage Information:
1533e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1534e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1535e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1536e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1537e0245417SLois Curfman McInnes 
1538e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
15395ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
15405ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
15415ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
15425ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1543ff756334SLois Curfman McInnes 
15445511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
15455511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
15462191d07cSBarry Smith 
1547b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1548b810aeb4SBarry Smith $         -------------------
15495511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
15505511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
15515511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
15525511cfe3SLois Curfman McInnes $         -------------------
1553b810aeb4SBarry Smith $
1554b810aeb4SBarry Smith 
15555511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
15565511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
15575511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
15585511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
15595511cfe3SLois Curfman McInnes 
15605511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
15615511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
15625511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
15633d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
156492e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
15656da5968aSLois Curfman McInnes    matrices.
15663a511b96SLois Curfman McInnes 
1567dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1568ff756334SLois Curfman McInnes 
1569fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
15708a729477SBarry Smith @*/
15711eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
157244cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
15738a729477SBarry Smith {
157444cd7ae7SLois Curfman McInnes   Mat          B;
157544cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
157636ce4990SBarry Smith   int          ierr, i,sum[2],work[2],size;
1577416022c9SBarry Smith 
157844cd7ae7SLois Curfman McInnes   *A = 0;
1579f09e8eb9SSatish Balay   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm);
158044cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
158144cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
158244cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
158344cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
158436ce4990SBarry Smith   MPI_Comm_size(comm,&size);
158536ce4990SBarry Smith   if (size == 1) {
158636ce4990SBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
158736ce4990SBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
158836ce4990SBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
158936ce4990SBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
159036ce4990SBarry Smith     B->ops.lufactor          = MatLUFactor_MPIAIJ;
159136ce4990SBarry Smith     B->ops.solve             = MatSolve_MPIAIJ;
159236ce4990SBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
159336ce4990SBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
159436ce4990SBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
159536ce4990SBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
159636ce4990SBarry Smith   }
159744cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
159844cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
159944cd7ae7SLois Curfman McInnes   B->factor     = 0;
160044cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
160190f02eecSBarry Smith   B->mapping    = 0;
1602d6dfbf8fSBarry Smith 
160347794344SBarry Smith   B->insertmode = NOT_SET_VALUES;
160444cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
160544cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
16061eb62cbbSBarry Smith 
1607b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1608e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
16091987afe7SBarry Smith 
1610dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
16111eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1612d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1613dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1614dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
16151eb62cbbSBarry Smith   }
161644cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
161744cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
161844cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
161944cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
162044cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
162144cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
16221eb62cbbSBarry Smith 
16231eb62cbbSBarry Smith   /* build local table of row and column ownerships */
162444cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1625f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1626603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
162744cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
162844cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
162944cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
163044cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
16318a729477SBarry Smith   }
163244cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
163344cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
163444cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
163544cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
163644cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
163744cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
16388a729477SBarry Smith   }
163944cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
164044cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
16418a729477SBarry Smith 
16425ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1643029af93fSBarry Smith   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
164444cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
16457b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1646029af93fSBarry Smith   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
164744cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
16488a729477SBarry Smith 
16491eb62cbbSBarry Smith   /* build cache for off array entries formed */
165044cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
165190f02eecSBarry Smith   b->donotstash  = 0;
165244cd7ae7SLois Curfman McInnes   b->colmap      = 0;
165344cd7ae7SLois Curfman McInnes   b->garray      = 0;
165444cd7ae7SLois Curfman McInnes   b->roworiented = 1;
16558a729477SBarry Smith 
16561eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
165744cd7ae7SLois Curfman McInnes   b->lvec      = 0;
165844cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
16598a729477SBarry Smith 
16607a0afa10SBarry Smith   /* stuff for MatGetRow() */
166144cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
166244cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
166344cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
16647a0afa10SBarry Smith 
166544cd7ae7SLois Curfman McInnes   *A = B;
1666d6dfbf8fSBarry Smith   return 0;
1667d6dfbf8fSBarry Smith }
1668c74985f6SBarry Smith 
16695615d1e5SSatish Balay #undef __FUNC__
16705615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ"
16718f6be9afSLois Curfman McInnes int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1672d6dfbf8fSBarry Smith {
1673d6dfbf8fSBarry Smith   Mat        mat;
1674416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1675a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1676d6dfbf8fSBarry Smith 
1677416022c9SBarry Smith   *newmat       = 0;
1678f09e8eb9SSatish Balay   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1679a5a9c739SBarry Smith   PLogObjectCreate(mat);
16800452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1681416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
168244a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
168344a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1684d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1685c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1686d6dfbf8fSBarry Smith 
168744cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
168844cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
168944cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
169044cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1691d6dfbf8fSBarry Smith 
1692416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1693416022c9SBarry Smith   a->rend         = oldmat->rend;
1694416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1695416022c9SBarry Smith   a->cend         = oldmat->cend;
169617699dbbSLois Curfman McInnes   a->size         = oldmat->size;
169717699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
169847794344SBarry Smith   mat->insertmode = NOT_SET_VALUES;
1699bcd2baecSBarry Smith   a->rowvalues    = 0;
1700bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1701d6dfbf8fSBarry Smith 
1702603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1703f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1704603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1705603f58a4SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1706416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
17072ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
17080452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1709416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1710416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1711416022c9SBarry Smith   } else a->colmap = 0;
17123f41c07dSBarry Smith   if (oldmat->garray) {
17133f41c07dSBarry Smith     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
17143f41c07dSBarry Smith     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1715464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
17163f41c07dSBarry Smith     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1717416022c9SBarry Smith   } else a->garray = 0;
1718d6dfbf8fSBarry Smith 
1719416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1720416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1721a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1722416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1723416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1724416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1725416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1726416022c9SBarry Smith   PLogObjectParent(mat,a->B);
17275dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
172825cdf11fSBarry Smith   if (flg) {
1729682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1730682d7d0cSBarry Smith   }
17318a729477SBarry Smith   *newmat = mat;
17328a729477SBarry Smith   return 0;
17338a729477SBarry Smith }
1734416022c9SBarry Smith 
173577c4ece6SBarry Smith #include "sys.h"
1736416022c9SBarry Smith 
17375615d1e5SSatish Balay #undef __FUNC__
17385615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ"
173919bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1740416022c9SBarry Smith {
1741d65a2f8fSBarry Smith   Mat          A;
1742416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1743d65a2f8fSBarry Smith   Scalar       *vals,*svals;
174419bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1745416022c9SBarry Smith   MPI_Status   status;
174617699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1747d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
174819bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1749416022c9SBarry Smith 
175017699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
175117699dbbSLois Curfman McInnes   if (!rank) {
175290ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
175377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1754e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1755416022c9SBarry Smith   }
1756416022c9SBarry Smith 
1757416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1758416022c9SBarry Smith   M = header[1]; N = header[2];
1759416022c9SBarry Smith   /* determine ownership of all rows */
176017699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
17610452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1762416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1763416022c9SBarry Smith   rowners[0] = 0;
176417699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1765416022c9SBarry Smith     rowners[i] += rowners[i-1];
1766416022c9SBarry Smith   }
176717699dbbSLois Curfman McInnes   rstart = rowners[rank];
176817699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1769416022c9SBarry Smith 
1770416022c9SBarry Smith   /* distribute row lengths to all processors */
17710452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1772416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
177317699dbbSLois Curfman McInnes   if (!rank) {
17740452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
177577c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
17760452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
177717699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1778d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
17790452661fSBarry Smith     PetscFree(sndcounts);
1780416022c9SBarry Smith   }
1781416022c9SBarry Smith   else {
1782416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1783416022c9SBarry Smith   }
1784416022c9SBarry Smith 
178517699dbbSLois Curfman McInnes   if (!rank) {
1786416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
17870452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1788cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
178917699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1790416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1791416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1792416022c9SBarry Smith       }
1793416022c9SBarry Smith     }
17940452661fSBarry Smith     PetscFree(rowlengths);
1795416022c9SBarry Smith 
1796416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1797416022c9SBarry Smith     maxnz = 0;
179817699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
17990452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1800416022c9SBarry Smith     }
18010452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1802416022c9SBarry Smith 
1803416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1804416022c9SBarry Smith     nz = procsnz[0];
18050452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
180677c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1807d65a2f8fSBarry Smith 
1808d65a2f8fSBarry Smith     /* read in every one elses and ship off */
180917699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1810d65a2f8fSBarry Smith       nz = procsnz[i];
181177c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1812d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1813d65a2f8fSBarry Smith     }
18140452661fSBarry Smith     PetscFree(cols);
1815416022c9SBarry Smith   }
1816416022c9SBarry Smith   else {
1817416022c9SBarry Smith     /* determine buffer space needed for message */
1818416022c9SBarry Smith     nz = 0;
1819416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1820416022c9SBarry Smith       nz += ourlens[i];
1821416022c9SBarry Smith     }
18220452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1823416022c9SBarry Smith 
1824416022c9SBarry Smith     /* receive message of column indices*/
1825d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1826416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1827e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1828416022c9SBarry Smith   }
1829416022c9SBarry Smith 
1830416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1831cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1832416022c9SBarry Smith   jj = 0;
1833416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1834416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1835d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1836416022c9SBarry Smith       jj++;
1837416022c9SBarry Smith     }
1838416022c9SBarry Smith   }
1839d65a2f8fSBarry Smith 
1840d65a2f8fSBarry Smith   /* create our matrix */
1841416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1842416022c9SBarry Smith     ourlens[i] -= offlens[i];
1843416022c9SBarry Smith   }
1844d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1845d65a2f8fSBarry Smith   A = *newmat;
18466d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
1847d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1848d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1849d65a2f8fSBarry Smith   }
1850416022c9SBarry Smith 
185117699dbbSLois Curfman McInnes   if (!rank) {
18520452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1853416022c9SBarry Smith 
1854416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1855416022c9SBarry Smith     nz = procsnz[0];
185677c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1857d65a2f8fSBarry Smith 
1858d65a2f8fSBarry Smith     /* insert into matrix */
1859d65a2f8fSBarry Smith     jj      = rstart;
1860d65a2f8fSBarry Smith     smycols = mycols;
1861d65a2f8fSBarry Smith     svals   = vals;
1862d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1863d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1864d65a2f8fSBarry Smith       smycols += ourlens[i];
1865d65a2f8fSBarry Smith       svals   += ourlens[i];
1866d65a2f8fSBarry Smith       jj++;
1867416022c9SBarry Smith     }
1868416022c9SBarry Smith 
1869d65a2f8fSBarry Smith     /* read in other processors and ship out */
187017699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1871416022c9SBarry Smith       nz = procsnz[i];
187277c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1873d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1874416022c9SBarry Smith     }
18750452661fSBarry Smith     PetscFree(procsnz);
1876416022c9SBarry Smith   }
1877d65a2f8fSBarry Smith   else {
1878d65a2f8fSBarry Smith     /* receive numeric values */
18790452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1880416022c9SBarry Smith 
1881d65a2f8fSBarry Smith     /* receive message of values*/
1882d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1883d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1884e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1885d65a2f8fSBarry Smith 
1886d65a2f8fSBarry Smith     /* insert into matrix */
1887d65a2f8fSBarry Smith     jj      = rstart;
1888d65a2f8fSBarry Smith     smycols = mycols;
1889d65a2f8fSBarry Smith     svals   = vals;
1890d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1891d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1892d65a2f8fSBarry Smith       smycols += ourlens[i];
1893d65a2f8fSBarry Smith       svals   += ourlens[i];
1894d65a2f8fSBarry Smith       jj++;
1895d65a2f8fSBarry Smith     }
1896d65a2f8fSBarry Smith   }
18970452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1898d65a2f8fSBarry Smith 
18996d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
19006d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1901416022c9SBarry Smith   return 0;
1902416022c9SBarry Smith }
1903