xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3d1612f7f6a78b84146aff229cce396c8cf5cec5)
1cb512458SBarry Smith #ifndef lint
2*3d1612f7SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.102 1995/12/21 18:31:45 bsmith Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
392ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
401eb62cbbSBarry Smith                                int *idxn,Scalar *v,InsertMode addv)
418a729477SBarry Smith {
4244a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
43dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
46dbb450caSBarry Smith   int        shift = C->indexshift;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
53bbb6d6a8SBarry Smith     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54bbb6d6a8SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
551eb62cbbSBarry Smith     if (idxm[i] >= rstart && idxm[i] < rend) {
561eb62cbbSBarry Smith       row = idxm[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
58bbb6d6a8SBarry Smith         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59bbb6d6a8SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
601eb62cbbSBarry Smith         if (idxn[j] >= cstart && idxn[j] < cend){
611eb62cbbSBarry Smith           col = idxn[j] - cstart;
6278b31e54SBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
631eb62cbbSBarry Smith         }
641eb62cbbSBarry Smith         else {
65d6dfbf8fSBarry Smith           if (aij->assembled) {
66b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
67dbb450caSBarry Smith             col = aij->colmap[idxn[j]] + shift;
68ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
692493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
702493cbb0SBarry Smith               col =  idxn[j];
71d6dfbf8fSBarry Smith             }
729e25ed09SBarry Smith           }
739e25ed09SBarry Smith           else col = idxn[j];
7478b31e54SBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
751eb62cbbSBarry Smith         }
761eb62cbbSBarry Smith       }
771eb62cbbSBarry Smith     }
781eb62cbbSBarry Smith     else {
79416022c9SBarry Smith       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr);
801eb62cbbSBarry Smith     }
818a729477SBarry Smith   }
828a729477SBarry Smith   return 0;
838a729477SBarry Smith }
848a729477SBarry Smith 
85b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
86b49de8d1SLois Curfman McInnes {
87b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
88b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
89b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
90b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
91b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
92b49de8d1SLois Curfman McInnes 
93b49de8d1SLois Curfman McInnes   if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix");
94b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
95b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
96b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
97b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
98b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
99b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
100b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
101b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
102b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
103b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
104b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
105b49de8d1SLois Curfman McInnes         }
106b49de8d1SLois Curfman McInnes         else {
107b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
108b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
109b49de8d1SLois Curfman McInnes         }
110b49de8d1SLois Curfman McInnes       }
111b49de8d1SLois Curfman McInnes     }
112b49de8d1SLois Curfman McInnes     else {
113b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
114b49de8d1SLois Curfman McInnes     }
115b49de8d1SLois Curfman McInnes   }
116b49de8d1SLois Curfman McInnes   return 0;
117b49de8d1SLois Curfman McInnes }
118b49de8d1SLois Curfman McInnes 
119fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1208a729477SBarry Smith {
12144a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
122d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
12317699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
12417699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1251eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1266abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1271eb62cbbSBarry Smith   InsertMode  addv;
1281eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1291eb62cbbSBarry Smith 
1301eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
131d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
132dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
133bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1341eb62cbbSBarry Smith   }
1351eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1361eb62cbbSBarry Smith 
1371eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1380452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
139cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1400452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1411eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1421eb62cbbSBarry Smith     idx = aij->stash.idx[i];
14317699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1441eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1451eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1468a729477SBarry Smith       }
1478a729477SBarry Smith     }
1488a729477SBarry Smith   }
14917699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1501eb62cbbSBarry Smith 
1511eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1520452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
15317699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
15417699dbbSLois Curfman McInnes   nreceives = work[rank];
15517699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
15617699dbbSLois Curfman McInnes   nmax = work[rank];
1570452661fSBarry Smith   PetscFree(work);
1581eb62cbbSBarry Smith 
1591eb62cbbSBarry Smith   /* post receives:
1601eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1611eb62cbbSBarry Smith      (global index,value) we store the global index as a double
162d6dfbf8fSBarry Smith      to simplify the message passing.
1631eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1641eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1651eb62cbbSBarry Smith      this is a lot of wasted space.
1661eb62cbbSBarry Smith 
1671eb62cbbSBarry Smith 
1681eb62cbbSBarry Smith        This could be done better.
1691eb62cbbSBarry Smith   */
1700452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
17178b31e54SBarry Smith   CHKPTRQ(rvalues);
1720452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
17378b31e54SBarry Smith   CHKPTRQ(recv_waits);
1741eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
175416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1761eb62cbbSBarry Smith               comm,recv_waits+i);
1771eb62cbbSBarry Smith   }
1781eb62cbbSBarry Smith 
1791eb62cbbSBarry Smith   /* do sends:
1801eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1811eb62cbbSBarry Smith          the ith processor
1821eb62cbbSBarry Smith   */
1830452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1840452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
18578b31e54SBarry Smith   CHKPTRQ(send_waits);
1860452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1871eb62cbbSBarry Smith   starts[0] = 0;
18817699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1891eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1901eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
1911eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
1921eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
1931eb62cbbSBarry Smith   }
1940452661fSBarry Smith   PetscFree(owner);
1951eb62cbbSBarry Smith   starts[0] = 0;
19617699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1971eb62cbbSBarry Smith   count = 0;
19817699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
1991eb62cbbSBarry Smith     if (procs[i]) {
200416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2011eb62cbbSBarry Smith                 comm,send_waits+count++);
2021eb62cbbSBarry Smith     }
2031eb62cbbSBarry Smith   }
2040452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2051eb62cbbSBarry Smith 
2061eb62cbbSBarry Smith   /* Free cache space */
20778b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2081eb62cbbSBarry Smith 
2091eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2101eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2111eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2121eb62cbbSBarry Smith   aij->rmax       = nmax;
2131eb62cbbSBarry Smith 
2141eb62cbbSBarry Smith   return 0;
2151eb62cbbSBarry Smith }
21644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2171eb62cbbSBarry Smith 
218fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2191eb62cbbSBarry Smith {
22044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
221dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2221eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
223416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
224416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2251eb62cbbSBarry Smith   Scalar      *values,val;
2261eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2271eb62cbbSBarry Smith 
2281eb62cbbSBarry Smith   /*  wait on receives */
2291eb62cbbSBarry Smith   while (count) {
230d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2311eb62cbbSBarry Smith     /* unpack receives into our local space */
232d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
233c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2341eb62cbbSBarry Smith     n = n/3;
2351eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
2361eb62cbbSBarry Smith       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
2371eb62cbbSBarry Smith       col = (int) PETSCREAL(values[3*i+1]);
2381eb62cbbSBarry Smith       val = values[3*i+2];
2391eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2401eb62cbbSBarry Smith         col -= aij->cstart;
2411eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2421eb62cbbSBarry Smith       }
2431eb62cbbSBarry Smith       else {
244d6dfbf8fSBarry Smith         if (aij->assembled) {
245b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
246dbb450caSBarry Smith           col = aij->colmap[col] + shift;
247ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2482493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2492493cbb0SBarry Smith             col = (int) PETSCREAL(values[3*i+1]);
250d6dfbf8fSBarry Smith           }
2519e25ed09SBarry Smith         }
2521eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2531eb62cbbSBarry Smith       }
2541eb62cbbSBarry Smith     }
2551eb62cbbSBarry Smith     count--;
2561eb62cbbSBarry Smith   }
2570452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2581eb62cbbSBarry Smith 
2591eb62cbbSBarry Smith   /* wait on sends */
2601eb62cbbSBarry Smith   if (aij->nsends) {
2610452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
26278b31e54SBarry Smith     CHKPTRQ(send_status);
2631eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2640452661fSBarry Smith     PetscFree(send_status);
2651eb62cbbSBarry Smith   }
2660452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2671eb62cbbSBarry Smith 
26864119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
26978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
27078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2711eb62cbbSBarry Smith 
2722493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2732493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
274416022c9SBarry Smith   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
2752493cbb0SBarry Smith   if (aij->assembled && !other_disassembled) {
2762493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2772493cbb0SBarry Smith   }
2782493cbb0SBarry Smith 
2795e42470aSBarry Smith   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
28078b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
281d6dfbf8fSBarry Smith     aij->assembled = 1;
2825e42470aSBarry Smith   }
28378b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
28478b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2855e42470aSBarry Smith 
2868a729477SBarry Smith   return 0;
2878a729477SBarry Smith }
2888a729477SBarry Smith 
2892ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
2901eb62cbbSBarry Smith {
29144a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
292dbd7a890SLois Curfman McInnes   int        ierr;
29378b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
29478b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
2951eb62cbbSBarry Smith   return 0;
2961eb62cbbSBarry Smith }
2971eb62cbbSBarry Smith 
2981eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
2991eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3001eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3011eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3021eb62cbbSBarry Smith    routine.
3031eb62cbbSBarry Smith */
30444a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3051eb62cbbSBarry Smith {
30644a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
30717699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3086abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
30917699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3105392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
311d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
312d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3131eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3141eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3151eb62cbbSBarry Smith   IS             istmp;
3161eb62cbbSBarry Smith 
31748d91487SBarry Smith   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
31878b31e54SBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
31978b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3201eb62cbbSBarry Smith 
3211eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3220452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
323cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3240452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3251eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3261eb62cbbSBarry Smith     idx = rows[i];
3271eb62cbbSBarry Smith     found = 0;
32817699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3291eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3301eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3311eb62cbbSBarry Smith       }
3321eb62cbbSBarry Smith     }
333bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3341eb62cbbSBarry Smith   }
33517699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3361eb62cbbSBarry Smith 
3371eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3380452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
33917699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
34017699dbbSLois Curfman McInnes   nrecvs = work[rank];
34117699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
34217699dbbSLois Curfman McInnes   nmax = work[rank];
3430452661fSBarry Smith   PetscFree(work);
3441eb62cbbSBarry Smith 
3451eb62cbbSBarry Smith   /* post receives:   */
3460452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
34778b31e54SBarry Smith   CHKPTRQ(rvalues);
3480452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
34978b31e54SBarry Smith   CHKPTRQ(recv_waits);
3501eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
351416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3521eb62cbbSBarry Smith   }
3531eb62cbbSBarry Smith 
3541eb62cbbSBarry Smith   /* do sends:
3551eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3561eb62cbbSBarry Smith          the ith processor
3571eb62cbbSBarry Smith   */
3580452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3590452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
36078b31e54SBarry Smith   CHKPTRQ(send_waits);
3610452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3621eb62cbbSBarry Smith   starts[0] = 0;
36317699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3641eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3651eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3661eb62cbbSBarry Smith   }
3671eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3681eb62cbbSBarry Smith 
3691eb62cbbSBarry Smith   starts[0] = 0;
37017699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3711eb62cbbSBarry Smith   count = 0;
37217699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3731eb62cbbSBarry Smith     if (procs[i]) {
374416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3751eb62cbbSBarry Smith     }
3761eb62cbbSBarry Smith   }
3770452661fSBarry Smith   PetscFree(starts);
3781eb62cbbSBarry Smith 
37917699dbbSLois Curfman McInnes   base = owners[rank];
3801eb62cbbSBarry Smith 
3811eb62cbbSBarry Smith   /*  wait on receives */
3820452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3831eb62cbbSBarry Smith   source = lens + nrecvs;
3841eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3851eb62cbbSBarry Smith   while (count) {
386d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3871eb62cbbSBarry Smith     /* unpack receives into our local space */
3881eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
389d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
390d6dfbf8fSBarry Smith     lens[imdex]  = n;
3911eb62cbbSBarry Smith     slen += n;
3921eb62cbbSBarry Smith     count--;
3931eb62cbbSBarry Smith   }
3940452661fSBarry Smith   PetscFree(recv_waits);
3951eb62cbbSBarry Smith 
3961eb62cbbSBarry Smith   /* move the data into the send scatter */
3970452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
3981eb62cbbSBarry Smith   count = 0;
3991eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4001eb62cbbSBarry Smith     values = rvalues + i*nmax;
4011eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4021eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4031eb62cbbSBarry Smith     }
4041eb62cbbSBarry Smith   }
4050452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4060452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4071eb62cbbSBarry Smith 
4081eb62cbbSBarry Smith   /* actually zap the local rows */
409416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
410464493b3SBarry Smith   PLogObjectParent(A,istmp);
4110452661fSBarry Smith   PetscFree(lrows);
41278b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
41378b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
41478b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4151eb62cbbSBarry Smith 
4161eb62cbbSBarry Smith   /* wait on sends */
4171eb62cbbSBarry Smith   if (nsends) {
4180452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
41978b31e54SBarry Smith     CHKPTRQ(send_status);
4201eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4210452661fSBarry Smith     PetscFree(send_status);
4221eb62cbbSBarry Smith   }
4230452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4241eb62cbbSBarry Smith 
4251eb62cbbSBarry Smith   return 0;
4261eb62cbbSBarry Smith }
4271eb62cbbSBarry Smith 
428416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4291eb62cbbSBarry Smith {
430416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
4311eb62cbbSBarry Smith   int        ierr;
432416022c9SBarry Smith 
43348d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
43464119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
435416022c9SBarry Smith   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
43664119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
437416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4381eb62cbbSBarry Smith   return 0;
4391eb62cbbSBarry Smith }
4401eb62cbbSBarry Smith 
441416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
442da3a660dSBarry Smith {
443416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
444da3a660dSBarry Smith   int        ierr;
44548d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
44664119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
447416022c9SBarry Smith   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
44864119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
449416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
450da3a660dSBarry Smith   return 0;
451da3a660dSBarry Smith }
452da3a660dSBarry Smith 
453416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
454da3a660dSBarry Smith {
455416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
456da3a660dSBarry Smith   int        ierr;
457da3a660dSBarry Smith 
45848d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
459da3a660dSBarry Smith   /* do nondiagonal part */
460416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
461da3a660dSBarry Smith   /* send it on its way */
462416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
46364119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
464da3a660dSBarry Smith   /* do local part */
465416022c9SBarry Smith   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
466da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
467da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
468da3a660dSBarry Smith   /* but is not perhaps always true. */
469416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
47064119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
471da3a660dSBarry Smith   return 0;
472da3a660dSBarry Smith }
473da3a660dSBarry Smith 
474416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
475da3a660dSBarry Smith {
476416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
477da3a660dSBarry Smith   int        ierr;
478da3a660dSBarry Smith 
47948d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
480da3a660dSBarry Smith   /* do nondiagonal part */
481416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
482da3a660dSBarry Smith   /* send it on its way */
483416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
48464119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
485da3a660dSBarry Smith   /* do local part */
486416022c9SBarry Smith   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
487da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
488da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
489da3a660dSBarry Smith   /* but is not perhaps always true. */
490416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
49164119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
492da3a660dSBarry Smith   return 0;
493da3a660dSBarry Smith }
494da3a660dSBarry Smith 
4951eb62cbbSBarry Smith /*
4961eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
4971eb62cbbSBarry Smith    diagonal block
4981eb62cbbSBarry Smith */
499416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5001eb62cbbSBarry Smith {
501416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
50248d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
503416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5041eb62cbbSBarry Smith }
5051eb62cbbSBarry Smith 
50644a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5071eb62cbbSBarry Smith {
5081eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
50944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5101eb62cbbSBarry Smith   int        ierr;
511a5a9c739SBarry Smith #if defined(PETSC_LOG)
5126e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
513a5a9c739SBarry Smith #endif
5140452661fSBarry Smith   PetscFree(aij->rowners);
51578b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
51678b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5170452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5180452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5191eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
520a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5210452661fSBarry Smith   PetscFree(aij);
522a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5230452661fSBarry Smith   PetscHeaderDestroy(mat);
5241eb62cbbSBarry Smith   return 0;
5251eb62cbbSBarry Smith }
5267857610eSBarry Smith #include "draw.h"
527b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
528ee50ffe9SBarry Smith 
529416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5301eb62cbbSBarry Smith {
531416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
532416022c9SBarry Smith   int         ierr;
533416022c9SBarry Smith 
53417699dbbSLois Curfman McInnes   if (aij->size == 1) {
535416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
536416022c9SBarry Smith   }
537a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
538416022c9SBarry Smith   return 0;
539416022c9SBarry Smith }
540416022c9SBarry Smith 
541d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
542416022c9SBarry Smith {
54344a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
544dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
545a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
546d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
547d636dbe3SBarry Smith   FILE        *fd;
548416022c9SBarry Smith 
549416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
550416022c9SBarry Smith     ierr = ViewerFileGetFormat_Private(viewer,&format);
55108480c60SBarry Smith     if (format == FILE_FORMAT_INFO_DETAILED) {
552a56f8943SBarry Smith       int nz,nzalloc,mem;
553a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
554a56f8943SBarry Smith       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
555a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
556a56f8943SBarry Smith       MPIU_Seq_begin(mat->comm,1);
557a56f8943SBarry Smith       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
558a56f8943SBarry Smith                 nzalloc,mem);
55908480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
56008480c60SBarry Smith       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
56108480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
56208480c60SBarry Smith       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
563a56f8943SBarry Smith       fflush(fd);
564a56f8943SBarry Smith       MPIU_Seq_end(mat->comm,1);
565a56f8943SBarry Smith       ierr = VecScatterView(aij->Mvctx,viewer);
566416022c9SBarry Smith       return 0;
567416022c9SBarry Smith     }
56808480c60SBarry Smith     else if (format == FILE_FORMAT_INFO) {
56908480c60SBarry Smith       return 0;
57008480c60SBarry Smith     }
571416022c9SBarry Smith   }
572416022c9SBarry Smith 
573416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER) {
574d636dbe3SBarry Smith     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
5757f813858SBarry Smith     MPIU_Seq_begin(mat->comm,1);
576d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
57717699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5781eb62cbbSBarry Smith            aij->cend);
57978b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
58078b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
581d13ab20cSBarry Smith     fflush(fd);
5827f813858SBarry Smith     MPIU_Seq_end(mat->comm,1);
583d13ab20cSBarry Smith   }
584416022c9SBarry Smith   else {
585a56f8943SBarry Smith     int size = aij->size;
586a56f8943SBarry Smith     rank = aij->rank;
58717699dbbSLois Curfman McInnes     if (size == 1) {
58878b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
58995373324SBarry Smith     }
59095373324SBarry Smith     else {
59195373324SBarry Smith       /* assemble the entire matrix onto first processor. */
59295373324SBarry Smith       Mat         A;
593ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
5942eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
59595373324SBarry Smith       Scalar      *a;
5962ee70a88SLois Curfman McInnes 
59717699dbbSLois Curfman McInnes       if (!rank) {
598b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
599c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
60095373324SBarry Smith       }
60195373324SBarry Smith       else {
602b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
603c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
60495373324SBarry Smith       }
605464493b3SBarry Smith       PLogObjectParent(mat,A);
606416022c9SBarry Smith 
60795373324SBarry Smith       /* copy over the A part */
608ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6092ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
61095373324SBarry Smith       row = aij->rstart;
611dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
61295373324SBarry Smith       for ( i=0; i<m; i++ ) {
613416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
61495373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
61595373324SBarry Smith       }
6162ee70a88SLois Curfman McInnes       aj = Aloc->j;
617dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
61895373324SBarry Smith 
61995373324SBarry Smith       /* copy over the B part */
620ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6212ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
62295373324SBarry Smith       row = aij->rstart;
6230452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
624dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
62595373324SBarry Smith       for ( i=0; i<m; i++ ) {
626416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
62795373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
62895373324SBarry Smith       }
6290452661fSBarry Smith       PetscFree(ct);
63078b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
63178b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
63217699dbbSLois Curfman McInnes       if (!rank) {
63378b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
63495373324SBarry Smith       }
63578b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
63695373324SBarry Smith     }
63795373324SBarry Smith   }
6381eb62cbbSBarry Smith   return 0;
6391eb62cbbSBarry Smith }
6401eb62cbbSBarry Smith 
641416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
642416022c9SBarry Smith {
643416022c9SBarry Smith   Mat         mat = (Mat) obj;
644416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
645416022c9SBarry Smith   int         ierr;
646416022c9SBarry Smith   PetscObject vobj = (PetscObject) viewer;
647416022c9SBarry Smith 
64848d91487SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
649416022c9SBarry Smith   if (!viewer) {
650416022c9SBarry Smith     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
651416022c9SBarry Smith   }
652416022c9SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
653416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
654d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
655416022c9SBarry Smith   }
656416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
657d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
658d7e8b826SBarry Smith   }
659d7e8b826SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
660d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
661416022c9SBarry Smith   }
662416022c9SBarry Smith   else if (vobj->cookie == DRAW_COOKIE) {
663d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
664416022c9SBarry Smith   }
665416022c9SBarry Smith   else if (vobj->type == BINARY_FILE_VIEWER) {
666416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
667416022c9SBarry Smith   }
668416022c9SBarry Smith   return 0;
669416022c9SBarry Smith }
670416022c9SBarry Smith 
671ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat);
6721eb62cbbSBarry Smith /*
6731eb62cbbSBarry Smith     This has to provide several versions.
6741eb62cbbSBarry Smith 
6751eb62cbbSBarry Smith      1) per sequential
6761eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6771eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
678d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
6791eb62cbbSBarry Smith */
680fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
681dbb450caSBarry Smith                            double fshift,int its,Vec xx)
6828a729477SBarry Smith {
68344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
684d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
685ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
6866abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
6876abc6512SBarry Smith   int        ierr,*idx, *diag;
688416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
689da3a660dSBarry Smith   Vec        tt;
6908a729477SBarry Smith 
69148d91487SBarry Smith   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
6921eb62cbbSBarry Smith 
693d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
694dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
695dbb450caSBarry Smith   ls = ls + shift;
696ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
697d6dfbf8fSBarry Smith   diag = A->diag;
698acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
69948d91487SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
700acb40c82SBarry Smith   }
701da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
702da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
703da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
704da3a660dSBarry Smith 
705da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
706da3a660dSBarry Smith 
707da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
708da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
709da3a660dSBarry Smith     is the relaxation factor.
710da3a660dSBarry Smith     */
71178b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
712464493b3SBarry Smith     PLogObjectParent(matin,tt);
713da3a660dSBarry Smith     VecGetArray(tt,&t);
714da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
715da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
716da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
71764119d58SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
71878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
719da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
720da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
721dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
722dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
723da3a660dSBarry Smith       sum  = b[i];
724da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
725dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
726da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
727dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
728dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
729da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
730da3a660dSBarry Smith       x[i] = omega*(sum/d);
731da3a660dSBarry Smith     }
73264119d58SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
73378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
734da3a660dSBarry Smith 
735da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
736da3a660dSBarry Smith     v = A->a;
737dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
738da3a660dSBarry Smith 
739da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
740dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
741da3a660dSBarry Smith     diag = A->diag;
742da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
74364119d58SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
74478b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
745da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
746da3a660dSBarry Smith       n    = diag[i] - A->i[i];
747dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
748dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
749da3a660dSBarry Smith       sum  = t[i];
750da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
751dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
752da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
753dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
754dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
755da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
756da3a660dSBarry Smith       t[i] = omega*(sum/d);
757da3a660dSBarry Smith     }
75864119d58SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
75978b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
760da3a660dSBarry Smith     /*  x = x + t */
761da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
762da3a660dSBarry Smith     VecDestroy(tt);
763da3a660dSBarry Smith     return 0;
764da3a660dSBarry Smith   }
765da3a660dSBarry Smith 
7661eb62cbbSBarry Smith 
767d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
768da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
769da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
770da3a660dSBarry Smith     }
771da3a660dSBarry Smith     else {
77264119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
77378b31e54SBarry Smith       CHKERRQ(ierr);
77464119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
77578b31e54SBarry Smith       CHKERRQ(ierr);
776da3a660dSBarry Smith     }
777d6dfbf8fSBarry Smith     while (its--) {
778d6dfbf8fSBarry Smith       /* go down through the rows */
77964119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
78078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
781d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
782d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
783dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
784dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
785d6dfbf8fSBarry Smith         sum  = b[i];
786d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
787dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
788d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
789dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
790dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
791d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
792d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
793d6dfbf8fSBarry Smith       }
79464119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
79578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
796d6dfbf8fSBarry Smith       /* come up through the rows */
79764119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
79878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
799d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
800d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
801dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
802dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
803d6dfbf8fSBarry Smith         sum  = b[i];
804d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
805dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
806d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
807dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
808dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
809d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
810d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
811d6dfbf8fSBarry Smith       }
81264119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
81378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
814d6dfbf8fSBarry Smith     }
815d6dfbf8fSBarry Smith   }
816d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
817da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
818da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
81964119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
82078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
821da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
822da3a660dSBarry Smith         n    = diag[i] - A->i[i];
823dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
824dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
825da3a660dSBarry Smith         sum  = b[i];
826da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
827dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
828da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
829dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
830dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
831da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
832da3a660dSBarry Smith         x[i] = omega*(sum/d);
833da3a660dSBarry Smith       }
83464119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
83578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
836da3a660dSBarry Smith       its--;
837da3a660dSBarry Smith     }
838d6dfbf8fSBarry Smith     while (its--) {
83964119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
84078b31e54SBarry Smith       CHKERRQ(ierr);
84164119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
84278b31e54SBarry Smith       CHKERRQ(ierr);
84364119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
84478b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
845d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
846d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
847dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
848dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
849d6dfbf8fSBarry Smith         sum  = b[i];
850d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
851dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
852d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
853dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
854dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
855d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
856d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
857d6dfbf8fSBarry Smith       }
85864119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
85978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
860d6dfbf8fSBarry Smith     }
861d6dfbf8fSBarry Smith   }
862d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
863da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
864da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
86564119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
86678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
867da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
868da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
869dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
870dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
871da3a660dSBarry Smith         sum  = b[i];
872da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
873dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
874da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
875dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
876dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
877da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
878da3a660dSBarry Smith         x[i] = omega*(sum/d);
879da3a660dSBarry Smith       }
88064119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
88178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
882da3a660dSBarry Smith       its--;
883da3a660dSBarry Smith     }
884d6dfbf8fSBarry Smith     while (its--) {
88564119d58SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
88678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
88764119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
88878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
88964119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
89078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
891d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
892d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
893dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
894dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
895d6dfbf8fSBarry Smith         sum  = b[i];
896d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
897dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
898d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
899dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
900dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
901d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
902d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
903d6dfbf8fSBarry Smith       }
90464119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
90578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
906d6dfbf8fSBarry Smith     }
907d6dfbf8fSBarry Smith   }
908d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
909da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
910dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
911da3a660dSBarry Smith     }
91264119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
91378b31e54SBarry Smith     CHKERRQ(ierr);
91464119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
91578b31e54SBarry Smith     CHKERRQ(ierr);
916d6dfbf8fSBarry Smith     while (its--) {
917d6dfbf8fSBarry Smith       /* go down through the rows */
918d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
919d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
920dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
921dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
922d6dfbf8fSBarry Smith         sum  = b[i];
923d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
924dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
925d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
926dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
927dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
928d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
929d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
930d6dfbf8fSBarry Smith       }
931d6dfbf8fSBarry Smith       /* come up through the rows */
932d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
933d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
934dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
935dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
936d6dfbf8fSBarry Smith         sum  = b[i];
937d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
938dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
939d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
940dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
941dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
942d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
943d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
944d6dfbf8fSBarry Smith       }
945d6dfbf8fSBarry Smith     }
946d6dfbf8fSBarry Smith   }
947d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
948da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
949dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
950da3a660dSBarry Smith     }
95164119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
95278b31e54SBarry Smith     CHKERRQ(ierr);
95364119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
95478b31e54SBarry Smith     CHKERRQ(ierr);
955d6dfbf8fSBarry Smith     while (its--) {
956d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
957d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
958dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
959dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
960d6dfbf8fSBarry Smith         sum  = b[i];
961d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
962dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
963d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
964dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
965dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
966d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
967d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
968d6dfbf8fSBarry Smith       }
969d6dfbf8fSBarry Smith     }
970d6dfbf8fSBarry Smith   }
971d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
972da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
973dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
974da3a660dSBarry Smith     }
97564119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
97678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
97764119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
97878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
979d6dfbf8fSBarry Smith     while (its--) {
980d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
981d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
982dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
983dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
984d6dfbf8fSBarry Smith         sum  = b[i];
985d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
986dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
987d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
988dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
989dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
990d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
991d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
992d6dfbf8fSBarry Smith       }
993d6dfbf8fSBarry Smith     }
994d6dfbf8fSBarry Smith   }
9958a729477SBarry Smith   return 0;
9968a729477SBarry Smith }
997a66be287SLois Curfman McInnes 
998fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
999fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
1000a66be287SLois Curfman McInnes {
1001a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1002a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
1003a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1004a66be287SLois Curfman McInnes 
100578b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
100678b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1007a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1008a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
1009a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1010a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1011d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1012a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1013a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1014d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1015a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1016a66be287SLois Curfman McInnes   }
1017a66be287SLois Curfman McInnes   return 0;
1018a66be287SLois Curfman McInnes }
1019a66be287SLois Curfman McInnes 
1020299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1021299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1022299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1023299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1024299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1025299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1026299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1027299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1028299609e3SLois Curfman McInnes 
1029416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1030c74985f6SBarry Smith {
1031c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1032c74985f6SBarry Smith 
1033c0bbcb79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
1034c0bbcb79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
1035c0bbcb79SLois Curfman McInnes       op == COLUMNS_SORTED ||
1036c0bbcb79SLois Curfman McInnes       op == ROW_ORIENTED) {
1037c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
1038c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
1039c74985f6SBarry Smith   }
1040c0bbcb79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
1041c0bbcb79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
1042c0bbcb79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1043c0bbcb79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
1044c0bbcb79SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1045bbb6d6a8SBarry Smith   else if (op == COLUMN_ORIENTED)
10464d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");}
1047c0bbcb79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
10484d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1049c0bbcb79SLois Curfman McInnes   else
10504d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1051c74985f6SBarry Smith   return 0;
1052c74985f6SBarry Smith }
1053c74985f6SBarry Smith 
1054d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1055c74985f6SBarry Smith {
105644a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1057c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1058c74985f6SBarry Smith   return 0;
1059c74985f6SBarry Smith }
1060c74985f6SBarry Smith 
1061d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1062c74985f6SBarry Smith {
106344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1064b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1065c74985f6SBarry Smith   return 0;
1066c74985f6SBarry Smith }
1067c74985f6SBarry Smith 
1068d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1069c74985f6SBarry Smith {
107044a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1071c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1072c74985f6SBarry Smith   return 0;
1073c74985f6SBarry Smith }
1074c74985f6SBarry Smith 
107539e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
107639e00950SLois Curfman McInnes {
1077154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1078154123eaSLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1079154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1080154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
108139e00950SLois Curfman McInnes 
1082b49de8d1SLois Curfman McInnes   if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix");
1083b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1084abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
108539e00950SLois Curfman McInnes 
1086154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1087154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1088154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
108978b31e54SBarry Smith   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
109078b31e54SBarry Smith   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1091154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1092154123eaSLois Curfman McInnes 
1093154123eaSLois Curfman McInnes   if (v  || idx) {
1094154123eaSLois Curfman McInnes     if (nztot) {
1095154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1096299609e3SLois Curfman McInnes       int imark;
109748b35521SBarry Smith       if (mat->assembled) {
1098154123eaSLois Curfman McInnes         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
109948b35521SBarry Smith       }
1100154123eaSLois Curfman McInnes       if (v) {
11010452661fSBarry Smith         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
110239e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1103154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1104154123eaSLois Curfman McInnes           else break;
1105154123eaSLois Curfman McInnes         }
1106154123eaSLois Curfman McInnes         imark = i;
1107154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1108299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1109154123eaSLois Curfman McInnes       }
1110154123eaSLois Curfman McInnes       if (idx) {
11110452661fSBarry Smith         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1112154123eaSLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1113154123eaSLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1114154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1115154123eaSLois Curfman McInnes           else break;
1116154123eaSLois Curfman McInnes         }
1117154123eaSLois Curfman McInnes         imark = i;
1118154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1119299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
112039e00950SLois Curfman McInnes       }
112139e00950SLois Curfman McInnes     }
112239e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1123154123eaSLois Curfman McInnes   }
112439e00950SLois Curfman McInnes   *nz = nztot;
112578b31e54SBarry Smith   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
112678b31e54SBarry Smith   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
112739e00950SLois Curfman McInnes   return 0;
112839e00950SLois Curfman McInnes }
112939e00950SLois Curfman McInnes 
113039e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
113139e00950SLois Curfman McInnes {
11320452661fSBarry Smith   if (idx) PetscFree(*idx);
11330452661fSBarry Smith   if (v) PetscFree(*v);
113439e00950SLois Curfman McInnes   return 0;
113539e00950SLois Curfman McInnes }
113639e00950SLois Curfman McInnes 
1137cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1138855ac2c5SLois Curfman McInnes {
1139855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1140ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1141416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1142416022c9SBarry Smith   double     sum = 0.0;
114304ca555eSLois Curfman McInnes   Scalar     *v;
114404ca555eSLois Curfman McInnes 
1145416022c9SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
114617699dbbSLois Curfman McInnes   if (aij->size == 1) {
114714183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
114837fa93a5SLois Curfman McInnes   } else {
114904ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
115004ca555eSLois Curfman McInnes       v = amat->a;
115104ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
115204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
115304ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
115404ca555eSLois Curfman McInnes #else
115504ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
115604ca555eSLois Curfman McInnes #endif
115704ca555eSLois Curfman McInnes       }
115804ca555eSLois Curfman McInnes       v = bmat->a;
115904ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
116004ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
116104ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
116204ca555eSLois Curfman McInnes #else
116304ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
116404ca555eSLois Curfman McInnes #endif
116504ca555eSLois Curfman McInnes       }
116604ca555eSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
116704ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
116804ca555eSLois Curfman McInnes     }
116904ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
117004ca555eSLois Curfman McInnes       double *tmp, *tmp2;
117104ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
11720452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
11730452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1174cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
117504ca555eSLois Curfman McInnes       *norm = 0.0;
117604ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
117704ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1178579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
117904ca555eSLois Curfman McInnes       }
118004ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
118104ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1182579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
118304ca555eSLois Curfman McInnes       }
118404ca555eSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
118504ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
118604ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
118704ca555eSLois Curfman McInnes       }
11880452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
118904ca555eSLois Curfman McInnes     }
119004ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1191515d9167SLois Curfman McInnes       double ntemp = 0.0;
119204ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1193dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
119404ca555eSLois Curfman McInnes         sum = 0.0;
119504ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1196cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
119704ca555eSLois Curfman McInnes         }
1198dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
119904ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1200cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
120104ca555eSLois Curfman McInnes         }
1202515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
120304ca555eSLois Curfman McInnes       }
1204515d9167SLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
120504ca555eSLois Curfman McInnes     }
120604ca555eSLois Curfman McInnes     else {
1207515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
120804ca555eSLois Curfman McInnes     }
120937fa93a5SLois Curfman McInnes   }
1210855ac2c5SLois Curfman McInnes   return 0;
1211855ac2c5SLois Curfman McInnes }
1212855ac2c5SLois Curfman McInnes 
12130de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1214b7c46309SBarry Smith {
1215b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1216dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1217416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1218416022c9SBarry Smith   Mat        B;
1219b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1220b7c46309SBarry Smith   Scalar     *array;
1221b7c46309SBarry Smith 
1222416022c9SBarry Smith   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1223b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1224b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1225b7c46309SBarry Smith 
1226b7c46309SBarry Smith   /* copy over the A part */
1227ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1228b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1229b7c46309SBarry Smith   row = a->rstart;
1230dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1231b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1232416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1233b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1234b7c46309SBarry Smith   }
1235b7c46309SBarry Smith   aj = Aloc->j;
1236dbb450caSBarry Smith   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1237b7c46309SBarry Smith 
1238b7c46309SBarry Smith   /* copy over the B part */
1239ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1240b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1241b7c46309SBarry Smith   row = a->rstart;
12420452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1243dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1244b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1245416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1246b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1247b7c46309SBarry Smith   }
12480452661fSBarry Smith   PetscFree(ct);
1249b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1250b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
12510de55854SLois Curfman McInnes   if (matout) {
12520de55854SLois Curfman McInnes     *matout = B;
12530de55854SLois Curfman McInnes   } else {
12540de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12550452661fSBarry Smith     PetscFree(a->rowners);
12560de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
12570de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
12580452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
12590452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
12600de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1261a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
12620452661fSBarry Smith     PetscFree(a);
1263416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
12640452661fSBarry Smith     PetscHeaderDestroy(B);
12650de55854SLois Curfman McInnes   }
1266b7c46309SBarry Smith   return 0;
1267b7c46309SBarry Smith }
1268b7c46309SBarry Smith 
1269fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1270*3d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1271d6dfbf8fSBarry Smith 
12728a729477SBarry Smith /* -------------------------------------------------------------------*/
12732ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
127439e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
127544a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
127644a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1277299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1278299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1279299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
128044a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1281b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1282a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1283855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1284ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
12851eb62cbbSBarry Smith        0,
1286299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1287299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1288299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1289d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1290299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
1291*3d1612f7SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1292b49de8d1SLois Curfman McInnes        0,0,0,
1293b49de8d1SLois Curfman McInnes        0,0,MatGetValues_MPIAIJ};
12948a729477SBarry Smith 
12951987afe7SBarry Smith /*@C
1296ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1297ff756334SLois Curfman McInnes    (the default parallel PETSc format).
12988a729477SBarry Smith 
12998a729477SBarry Smith    Input Parameters:
13001eb62cbbSBarry Smith .  comm - MPI communicator
13017d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
13027d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
13037d3e4905SLois Curfman McInnes            if N is given)
13047d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
13057d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
13067d3e4905SLois Curfman McInnes            if n is given)
1307ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1308ff756334SLois Curfman McInnes            (same for all local rows)
1309ab693e5aSLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1310ab693e5aSLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1311ab693e5aSLois Curfman McInnes            for the diagonal entry even if it is zero.
1312ab693e5aSLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1313ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
1314ab693e5aSLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1315ab693e5aSLois Curfman McInnes            submatrix or null (possibly different for each row).
13168a729477SBarry Smith 
1317ff756334SLois Curfman McInnes    Output Parameter:
13188a729477SBarry Smith .  newmat - the matrix
13198a729477SBarry Smith 
1320ff756334SLois Curfman McInnes    Notes:
1321ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1322ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
13230002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
13240002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1325ff756334SLois Curfman McInnes 
1326ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1327ff756334SLois Curfman McInnes    (possibly both).
1328ff756334SLois Curfman McInnes 
1329e0245417SLois Curfman McInnes    Storage Information:
1330e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1331e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1332e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1333e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1334e0245417SLois Curfman McInnes 
1335e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
1336c451ab25SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set d_nz=0
1337b4fd4287SBarry Smith    and d_nnz=PETSC_NULL for PETSc to control dynamic memory allocation.
1338e0245417SLois Curfman McInnes    Likewise, specify preallocated storage for the off-diagonal part of
1339e0245417SLois Curfman McInnes    the local submatrix with o_nz or o_nnz (not both).
1340ff756334SLois Curfman McInnes 
1341c451ab25SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
1342c451ab25SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
1343c451ab25SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
1344c451ab25SLois Curfman McInnes 
1345c451ab25SLois Curfman McInnes    Options Database Keys:
1346c451ab25SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
1347c451ab25SLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit (max limit=5)
1348c451ab25SLois Curfman McInnes 
1349dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1350ff756334SLois Curfman McInnes 
1351fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
13528a729477SBarry Smith @*/
13531eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
13541eb62cbbSBarry Smith                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
13558a729477SBarry Smith {
13568a729477SBarry Smith   Mat          mat;
1357416022c9SBarry Smith   Mat_MPIAIJ   *a;
13586abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1359416022c9SBarry Smith 
13608a729477SBarry Smith   *newmat         = 0;
13610452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1362a5a9c739SBarry Smith   PLogObjectCreate(mat);
13630452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1364416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
136544a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
136644a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
13678a729477SBarry Smith   mat->factor     = 0;
1368d6dfbf8fSBarry Smith 
136964119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
137017699dbbSLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
137117699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
13721eb62cbbSBarry Smith 
1373b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
13741987afe7SBarry Smith     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
13751987afe7SBarry Smith 
1376dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
13771eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1378d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1379dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1380dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
13811eb62cbbSBarry Smith   }
138217699dbbSLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
138317699dbbSLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1384416022c9SBarry Smith   a->m = m;
1385416022c9SBarry Smith   a->n = n;
1386416022c9SBarry Smith   a->N = N;
1387416022c9SBarry Smith   a->M = M;
13881eb62cbbSBarry Smith 
13891eb62cbbSBarry Smith   /* build local table of row and column ownerships */
13900452661fSBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1391579c6b6fSBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
139217699dbbSLois Curfman McInnes   a->cowners = a->rowners + a->size + 1;
1393416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1394416022c9SBarry Smith   a->rowners[0] = 0;
139517699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1396416022c9SBarry Smith     a->rowners[i] += a->rowners[i-1];
13978a729477SBarry Smith   }
139817699dbbSLois Curfman McInnes   a->rstart = a->rowners[a->rank];
139917699dbbSLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
1400416022c9SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1401416022c9SBarry Smith   a->cowners[0] = 0;
140217699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1403416022c9SBarry Smith     a->cowners[i] += a->cowners[i-1];
14048a729477SBarry Smith   }
140517699dbbSLois Curfman McInnes   a->cstart = a->cowners[a->rank];
140617699dbbSLois Curfman McInnes   a->cend   = a->cowners[a->rank+1];
14078a729477SBarry Smith 
1408416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1409416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1410416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1411416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14128a729477SBarry Smith 
14131eb62cbbSBarry Smith   /* build cache for off array entries formed */
1414416022c9SBarry Smith   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1415416022c9SBarry Smith   a->colmap    = 0;
1416416022c9SBarry Smith   a->garray    = 0;
14178a729477SBarry Smith 
14181eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
1419416022c9SBarry Smith   a->lvec      = 0;
1420416022c9SBarry Smith   a->Mvctx     = 0;
1421416022c9SBarry Smith   a->assembled = 0;
14228a729477SBarry Smith 
1423d6dfbf8fSBarry Smith   *newmat = mat;
1424d6dfbf8fSBarry Smith   return 0;
1425d6dfbf8fSBarry Smith }
1426c74985f6SBarry Smith 
1427*3d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1428d6dfbf8fSBarry Smith {
1429d6dfbf8fSBarry Smith   Mat        mat;
1430416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
14312ee70a88SLois Curfman McInnes   int        ierr, len;
1432d6dfbf8fSBarry Smith 
1433*3d1612f7SBarry Smith   if (!oldmat->assembled) SETERRQ(1,"MatConvertSameType_MPIAIJ:Must assemble matrix");
1434416022c9SBarry Smith   *newmat       = 0;
14350452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1436a5a9c739SBarry Smith   PLogObjectCreate(mat);
14370452661fSBarry Smith   mat->data     = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1438416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
143944a69424SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIAIJ;
144044a69424SLois Curfman McInnes   mat->view     = MatView_MPIAIJ;
1441d6dfbf8fSBarry Smith   mat->factor   = matin->factor;
1442d6dfbf8fSBarry Smith 
1443416022c9SBarry Smith   a->m          = oldmat->m;
1444416022c9SBarry Smith   a->n          = oldmat->n;
1445416022c9SBarry Smith   a->M          = oldmat->M;
1446416022c9SBarry Smith   a->N          = oldmat->N;
1447d6dfbf8fSBarry Smith 
1448416022c9SBarry Smith   a->assembled  = 1;
1449416022c9SBarry Smith   a->rstart     = oldmat->rstart;
1450416022c9SBarry Smith   a->rend       = oldmat->rend;
1451416022c9SBarry Smith   a->cstart     = oldmat->cstart;
1452416022c9SBarry Smith   a->cend       = oldmat->cend;
145317699dbbSLois Curfman McInnes   a->size       = oldmat->size;
145417699dbbSLois Curfman McInnes   a->rank       = oldmat->rank;
145564119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
1456d6dfbf8fSBarry Smith 
14570452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
145817699dbbSLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
145917699dbbSLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1460416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14612ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
14620452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1463416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1464416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1465416022c9SBarry Smith   } else a->colmap = 0;
1466ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
14670452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1468464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1469416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1470416022c9SBarry Smith   } else a->garray = 0;
1471d6dfbf8fSBarry Smith 
1472416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1473416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1474a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1475416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1476416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1477416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1478416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1479416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14808a729477SBarry Smith   *newmat = mat;
14818a729477SBarry Smith   return 0;
14828a729477SBarry Smith }
1483416022c9SBarry Smith 
1484416022c9SBarry Smith #include "sysio.h"
1485416022c9SBarry Smith 
148602834360SBarry Smith int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1487416022c9SBarry Smith {
1488d65a2f8fSBarry Smith   Mat          A;
1489416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1490d65a2f8fSBarry Smith   Scalar       *vals,*svals;
1491416022c9SBarry Smith   PetscObject  vobj = (PetscObject) bview;
1492416022c9SBarry Smith   MPI_Comm     comm = vobj->comm;
1493416022c9SBarry Smith   MPI_Status   status;
149417699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1495d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1496d65a2f8fSBarry Smith   int          tag = ((PetscObject)bview)->tag;
1497416022c9SBarry Smith 
149817699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
149917699dbbSLois Curfman McInnes   if (!rank) {
1500416022c9SBarry Smith     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1501416022c9SBarry Smith     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
150202834360SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1503416022c9SBarry Smith   }
1504416022c9SBarry Smith 
1505416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1506416022c9SBarry Smith   M = header[1]; N = header[2];
1507416022c9SBarry Smith   /* determine ownership of all rows */
150817699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
15090452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1510416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1511416022c9SBarry Smith   rowners[0] = 0;
151217699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1513416022c9SBarry Smith     rowners[i] += rowners[i-1];
1514416022c9SBarry Smith   }
151517699dbbSLois Curfman McInnes   rstart = rowners[rank];
151617699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1517416022c9SBarry Smith 
1518416022c9SBarry Smith   /* distribute row lengths to all processors */
15190452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1520416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
152117699dbbSLois Curfman McInnes   if (!rank) {
15220452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1523416022c9SBarry Smith     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
15240452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
152517699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1526d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
15270452661fSBarry Smith     PetscFree(sndcounts);
1528416022c9SBarry Smith   }
1529416022c9SBarry Smith   else {
1530416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1531416022c9SBarry Smith   }
1532416022c9SBarry Smith 
153317699dbbSLois Curfman McInnes   if (!rank) {
1534416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
15350452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1536cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
153717699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1538416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1539416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1540416022c9SBarry Smith       }
1541416022c9SBarry Smith     }
15420452661fSBarry Smith     PetscFree(rowlengths);
1543416022c9SBarry Smith 
1544416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1545416022c9SBarry Smith     maxnz = 0;
154617699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
15470452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1548416022c9SBarry Smith     }
15490452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1550416022c9SBarry Smith 
1551416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1552416022c9SBarry Smith     nz = procsnz[0];
15530452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1554d65a2f8fSBarry Smith     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1555d65a2f8fSBarry Smith 
1556d65a2f8fSBarry Smith     /* read in every one elses and ship off */
155717699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1558d65a2f8fSBarry Smith       nz = procsnz[i];
1559416022c9SBarry Smith       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1560d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1561d65a2f8fSBarry Smith     }
15620452661fSBarry Smith     PetscFree(cols);
1563416022c9SBarry Smith   }
1564416022c9SBarry Smith   else {
1565416022c9SBarry Smith     /* determine buffer space needed for message */
1566416022c9SBarry Smith     nz = 0;
1567416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1568416022c9SBarry Smith       nz += ourlens[i];
1569416022c9SBarry Smith     }
15700452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1571416022c9SBarry Smith 
1572416022c9SBarry Smith     /* receive message of column indices*/
1573d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1574416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
157502834360SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1576416022c9SBarry Smith   }
1577416022c9SBarry Smith 
1578416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1579cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1580416022c9SBarry Smith   jj = 0;
1581416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1582416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1583d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1584416022c9SBarry Smith       jj++;
1585416022c9SBarry Smith     }
1586416022c9SBarry Smith   }
1587d65a2f8fSBarry Smith 
1588d65a2f8fSBarry Smith   /* create our matrix */
1589416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1590416022c9SBarry Smith     ourlens[i] -= offlens[i];
1591416022c9SBarry Smith   }
1592d65a2f8fSBarry Smith   if (type == MATMPIAIJ) {
1593d65a2f8fSBarry Smith     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1594d65a2f8fSBarry Smith   }
1595d65a2f8fSBarry Smith   else if (type == MATMPIROW) {
1596d65a2f8fSBarry Smith     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1597d65a2f8fSBarry Smith   }
1598d65a2f8fSBarry Smith   A = *newmat;
1599d65a2f8fSBarry Smith   MatSetOption(A,COLUMNS_SORTED);
1600d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1601d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1602d65a2f8fSBarry Smith   }
1603416022c9SBarry Smith 
160417699dbbSLois Curfman McInnes   if (!rank) {
16050452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1606416022c9SBarry Smith 
1607416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1608416022c9SBarry Smith     nz = procsnz[0];
1609416022c9SBarry Smith     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1610d65a2f8fSBarry Smith 
1611d65a2f8fSBarry Smith     /* insert into matrix */
1612d65a2f8fSBarry Smith     jj      = rstart;
1613d65a2f8fSBarry Smith     smycols = mycols;
1614d65a2f8fSBarry Smith     svals   = vals;
1615d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1616d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1617d65a2f8fSBarry Smith       smycols += ourlens[i];
1618d65a2f8fSBarry Smith       svals   += ourlens[i];
1619d65a2f8fSBarry Smith       jj++;
1620416022c9SBarry Smith     }
1621416022c9SBarry Smith 
1622d65a2f8fSBarry Smith     /* read in other processors and ship out */
162317699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1624416022c9SBarry Smith       nz = procsnz[i];
1625416022c9SBarry Smith       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1626d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1627416022c9SBarry Smith     }
16280452661fSBarry Smith     PetscFree(procsnz);
1629416022c9SBarry Smith   }
1630d65a2f8fSBarry Smith   else {
1631d65a2f8fSBarry Smith     /* receive numeric values */
16320452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1633416022c9SBarry Smith 
1634d65a2f8fSBarry Smith     /* receive message of values*/
1635d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1636d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
163702834360SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1638d65a2f8fSBarry Smith 
1639d65a2f8fSBarry Smith     /* insert into matrix */
1640d65a2f8fSBarry Smith     jj      = rstart;
1641d65a2f8fSBarry Smith     smycols = mycols;
1642d65a2f8fSBarry Smith     svals   = vals;
1643d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1644d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1645d65a2f8fSBarry Smith       smycols += ourlens[i];
1646d65a2f8fSBarry Smith       svals   += ourlens[i];
1647d65a2f8fSBarry Smith       jj++;
1648d65a2f8fSBarry Smith     }
1649d65a2f8fSBarry Smith   }
16500452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1651d65a2f8fSBarry Smith 
1652d65a2f8fSBarry Smith   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1653d65a2f8fSBarry Smith   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1654416022c9SBarry Smith   return 0;
1655416022c9SBarry Smith }
1656