xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b49de8d1b1f74f27495c5d351d287e6bdc71c90c)
1cb512458SBarry Smith #ifndef lint
2*b49de8d1SLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.99 1995/11/09 22:28:54 bsmith Exp curfman $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
392ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
401eb62cbbSBarry Smith                                int *idxn,Scalar *v,InsertMode addv)
418a729477SBarry Smith {
4244a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
43dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
46dbb450caSBarry Smith   int        shift = C->indexshift;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
53bbb6d6a8SBarry Smith     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54bbb6d6a8SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
551eb62cbbSBarry Smith     if (idxm[i] >= rstart && idxm[i] < rend) {
561eb62cbbSBarry Smith       row = idxm[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
58bbb6d6a8SBarry Smith         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59bbb6d6a8SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
601eb62cbbSBarry Smith         if (idxn[j] >= cstart && idxn[j] < cend){
611eb62cbbSBarry Smith           col = idxn[j] - cstart;
6278b31e54SBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
631eb62cbbSBarry Smith         }
641eb62cbbSBarry Smith         else {
65d6dfbf8fSBarry Smith           if (aij->assembled) {
66b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
67dbb450caSBarry Smith             col = aij->colmap[idxn[j]] + shift;
68ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
692493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
702493cbb0SBarry Smith               col =  idxn[j];
71d6dfbf8fSBarry Smith             }
729e25ed09SBarry Smith           }
739e25ed09SBarry Smith           else col = idxn[j];
7478b31e54SBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
751eb62cbbSBarry Smith         }
761eb62cbbSBarry Smith       }
771eb62cbbSBarry Smith     }
781eb62cbbSBarry Smith     else {
79416022c9SBarry Smith       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr);
801eb62cbbSBarry Smith     }
818a729477SBarry Smith   }
828a729477SBarry Smith   return 0;
838a729477SBarry Smith }
848a729477SBarry Smith 
85*b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
86*b49de8d1SLois Curfman McInnes {
87*b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
88*b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
89*b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
90*b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
91*b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
92*b49de8d1SLois Curfman McInnes 
93*b49de8d1SLois Curfman McInnes   if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix");
94*b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
95*b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
96*b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
97*b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
98*b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
99*b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
100*b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
101*b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
102*b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
103*b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
104*b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
105*b49de8d1SLois Curfman McInnes         }
106*b49de8d1SLois Curfman McInnes         else {
107*b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
108*b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
109*b49de8d1SLois Curfman McInnes         }
110*b49de8d1SLois Curfman McInnes       }
111*b49de8d1SLois Curfman McInnes     }
112*b49de8d1SLois Curfman McInnes     else {
113*b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
114*b49de8d1SLois Curfman McInnes     }
115*b49de8d1SLois Curfman McInnes   }
116*b49de8d1SLois Curfman McInnes   return 0;
117*b49de8d1SLois Curfman McInnes }
118*b49de8d1SLois Curfman McInnes 
119fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1208a729477SBarry Smith {
12144a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
122d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
12317699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
12417699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1251eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1266abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1271eb62cbbSBarry Smith   InsertMode  addv;
1281eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1291eb62cbbSBarry Smith 
1301eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
131d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
132dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
133bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1341eb62cbbSBarry Smith   }
1351eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1361eb62cbbSBarry Smith 
1371eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1380452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
139cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1400452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1411eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1421eb62cbbSBarry Smith     idx = aij->stash.idx[i];
14317699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1441eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1451eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1468a729477SBarry Smith       }
1478a729477SBarry Smith     }
1488a729477SBarry Smith   }
14917699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1501eb62cbbSBarry Smith 
1511eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1520452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
15317699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
15417699dbbSLois Curfman McInnes   nreceives = work[rank];
15517699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
15617699dbbSLois Curfman McInnes   nmax = work[rank];
1570452661fSBarry Smith   PetscFree(work);
1581eb62cbbSBarry Smith 
1591eb62cbbSBarry Smith   /* post receives:
1601eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1611eb62cbbSBarry Smith      (global index,value) we store the global index as a double
162d6dfbf8fSBarry Smith      to simplify the message passing.
1631eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1641eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1651eb62cbbSBarry Smith      this is a lot of wasted space.
1661eb62cbbSBarry Smith 
1671eb62cbbSBarry Smith 
1681eb62cbbSBarry Smith        This could be done better.
1691eb62cbbSBarry Smith   */
1700452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
17178b31e54SBarry Smith   CHKPTRQ(rvalues);
1720452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
17378b31e54SBarry Smith   CHKPTRQ(recv_waits);
1741eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
175416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1761eb62cbbSBarry Smith               comm,recv_waits+i);
1771eb62cbbSBarry Smith   }
1781eb62cbbSBarry Smith 
1791eb62cbbSBarry Smith   /* do sends:
1801eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1811eb62cbbSBarry Smith          the ith processor
1821eb62cbbSBarry Smith   */
1830452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1840452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
18578b31e54SBarry Smith   CHKPTRQ(send_waits);
1860452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1871eb62cbbSBarry Smith   starts[0] = 0;
18817699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1891eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1901eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
1911eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
1921eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
1931eb62cbbSBarry Smith   }
1940452661fSBarry Smith   PetscFree(owner);
1951eb62cbbSBarry Smith   starts[0] = 0;
19617699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1971eb62cbbSBarry Smith   count = 0;
19817699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
1991eb62cbbSBarry Smith     if (procs[i]) {
200416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2011eb62cbbSBarry Smith                 comm,send_waits+count++);
2021eb62cbbSBarry Smith     }
2031eb62cbbSBarry Smith   }
2040452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2051eb62cbbSBarry Smith 
2061eb62cbbSBarry Smith   /* Free cache space */
20778b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2081eb62cbbSBarry Smith 
2091eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2101eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2111eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2121eb62cbbSBarry Smith   aij->rmax       = nmax;
2131eb62cbbSBarry Smith 
2141eb62cbbSBarry Smith   return 0;
2151eb62cbbSBarry Smith }
21644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2171eb62cbbSBarry Smith 
218fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2191eb62cbbSBarry Smith {
22044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
221dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2221eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
223416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
224416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2251eb62cbbSBarry Smith   Scalar      *values,val;
2261eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2271eb62cbbSBarry Smith 
2281eb62cbbSBarry Smith   /*  wait on receives */
2291eb62cbbSBarry Smith   while (count) {
230d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2311eb62cbbSBarry Smith     /* unpack receives into our local space */
232d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
233c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2341eb62cbbSBarry Smith     n = n/3;
2351eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
2361eb62cbbSBarry Smith       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
2371eb62cbbSBarry Smith       col = (int) PETSCREAL(values[3*i+1]);
2381eb62cbbSBarry Smith       val = values[3*i+2];
2391eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2401eb62cbbSBarry Smith         col -= aij->cstart;
2411eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2421eb62cbbSBarry Smith       }
2431eb62cbbSBarry Smith       else {
244d6dfbf8fSBarry Smith         if (aij->assembled) {
245b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
246dbb450caSBarry Smith           col = aij->colmap[col] + shift;
247ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2482493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2492493cbb0SBarry Smith             col = (int) PETSCREAL(values[3*i+1]);
250d6dfbf8fSBarry Smith           }
2519e25ed09SBarry Smith         }
2521eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2531eb62cbbSBarry Smith       }
2541eb62cbbSBarry Smith     }
2551eb62cbbSBarry Smith     count--;
2561eb62cbbSBarry Smith   }
2570452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2581eb62cbbSBarry Smith 
2591eb62cbbSBarry Smith   /* wait on sends */
2601eb62cbbSBarry Smith   if (aij->nsends) {
2610452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
26278b31e54SBarry Smith     CHKPTRQ(send_status);
2631eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2640452661fSBarry Smith     PetscFree(send_status);
2651eb62cbbSBarry Smith   }
2660452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2671eb62cbbSBarry Smith 
26864119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
26978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
27078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2711eb62cbbSBarry Smith 
2722493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2732493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
274416022c9SBarry Smith   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
2752493cbb0SBarry Smith   if (aij->assembled && !other_disassembled) {
2762493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2772493cbb0SBarry Smith   }
2782493cbb0SBarry Smith 
2795e42470aSBarry Smith   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
28078b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
281d6dfbf8fSBarry Smith     aij->assembled = 1;
2825e42470aSBarry Smith   }
28378b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
28478b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2855e42470aSBarry Smith 
2868a729477SBarry Smith   return 0;
2878a729477SBarry Smith }
2888a729477SBarry Smith 
2892ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
2901eb62cbbSBarry Smith {
29144a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
292dbd7a890SLois Curfman McInnes   int        ierr;
29378b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
29478b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
2951eb62cbbSBarry Smith   return 0;
2961eb62cbbSBarry Smith }
2971eb62cbbSBarry Smith 
2981eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
2991eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3001eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3011eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3021eb62cbbSBarry Smith    routine.
3031eb62cbbSBarry Smith */
30444a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3051eb62cbbSBarry Smith {
30644a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
30717699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3086abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
30917699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3105392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
311d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
312d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3131eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3141eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3151eb62cbbSBarry Smith   IS             istmp;
3161eb62cbbSBarry Smith 
31748d91487SBarry Smith   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
31878b31e54SBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
31978b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3201eb62cbbSBarry Smith 
3211eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3220452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
323cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3240452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3251eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3261eb62cbbSBarry Smith     idx = rows[i];
3271eb62cbbSBarry Smith     found = 0;
32817699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3291eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3301eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3311eb62cbbSBarry Smith       }
3321eb62cbbSBarry Smith     }
333bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3341eb62cbbSBarry Smith   }
33517699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3361eb62cbbSBarry Smith 
3371eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3380452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
33917699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
34017699dbbSLois Curfman McInnes   nrecvs = work[rank];
34117699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
34217699dbbSLois Curfman McInnes   nmax = work[rank];
3430452661fSBarry Smith   PetscFree(work);
3441eb62cbbSBarry Smith 
3451eb62cbbSBarry Smith   /* post receives:   */
3460452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
34778b31e54SBarry Smith   CHKPTRQ(rvalues);
3480452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
34978b31e54SBarry Smith   CHKPTRQ(recv_waits);
3501eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
351416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3521eb62cbbSBarry Smith   }
3531eb62cbbSBarry Smith 
3541eb62cbbSBarry Smith   /* do sends:
3551eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3561eb62cbbSBarry Smith          the ith processor
3571eb62cbbSBarry Smith   */
3580452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3590452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
36078b31e54SBarry Smith   CHKPTRQ(send_waits);
3610452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3621eb62cbbSBarry Smith   starts[0] = 0;
36317699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3641eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3651eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3661eb62cbbSBarry Smith   }
3671eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3681eb62cbbSBarry Smith 
3691eb62cbbSBarry Smith   starts[0] = 0;
37017699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3711eb62cbbSBarry Smith   count = 0;
37217699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3731eb62cbbSBarry Smith     if (procs[i]) {
374416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3751eb62cbbSBarry Smith     }
3761eb62cbbSBarry Smith   }
3770452661fSBarry Smith   PetscFree(starts);
3781eb62cbbSBarry Smith 
37917699dbbSLois Curfman McInnes   base = owners[rank];
3801eb62cbbSBarry Smith 
3811eb62cbbSBarry Smith   /*  wait on receives */
3820452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3831eb62cbbSBarry Smith   source = lens + nrecvs;
3841eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3851eb62cbbSBarry Smith   while (count) {
386d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3871eb62cbbSBarry Smith     /* unpack receives into our local space */
3881eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
389d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
390d6dfbf8fSBarry Smith     lens[imdex]  = n;
3911eb62cbbSBarry Smith     slen += n;
3921eb62cbbSBarry Smith     count--;
3931eb62cbbSBarry Smith   }
3940452661fSBarry Smith   PetscFree(recv_waits);
3951eb62cbbSBarry Smith 
3961eb62cbbSBarry Smith   /* move the data into the send scatter */
3970452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
3981eb62cbbSBarry Smith   count = 0;
3991eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4001eb62cbbSBarry Smith     values = rvalues + i*nmax;
4011eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4021eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4031eb62cbbSBarry Smith     }
4041eb62cbbSBarry Smith   }
4050452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4060452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4071eb62cbbSBarry Smith 
4081eb62cbbSBarry Smith   /* actually zap the local rows */
409416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
410464493b3SBarry Smith   PLogObjectParent(A,istmp);
4110452661fSBarry Smith   PetscFree(lrows);
41278b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
41378b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
41478b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4151eb62cbbSBarry Smith 
4161eb62cbbSBarry Smith   /* wait on sends */
4171eb62cbbSBarry Smith   if (nsends) {
4180452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
41978b31e54SBarry Smith     CHKPTRQ(send_status);
4201eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4210452661fSBarry Smith     PetscFree(send_status);
4221eb62cbbSBarry Smith   }
4230452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4241eb62cbbSBarry Smith 
4251eb62cbbSBarry Smith   return 0;
4261eb62cbbSBarry Smith }
4271eb62cbbSBarry Smith 
428416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4291eb62cbbSBarry Smith {
430416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
4311eb62cbbSBarry Smith   int        ierr;
432416022c9SBarry Smith 
43348d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
43464119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
435416022c9SBarry Smith   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
43664119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
437416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4381eb62cbbSBarry Smith   return 0;
4391eb62cbbSBarry Smith }
4401eb62cbbSBarry Smith 
441416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
442da3a660dSBarry Smith {
443416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
444da3a660dSBarry Smith   int        ierr;
44548d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
44664119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
447416022c9SBarry Smith   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
44864119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
449416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
450da3a660dSBarry Smith   return 0;
451da3a660dSBarry Smith }
452da3a660dSBarry Smith 
453416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
454da3a660dSBarry Smith {
455416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
456da3a660dSBarry Smith   int        ierr;
457da3a660dSBarry Smith 
45848d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
459da3a660dSBarry Smith   /* do nondiagonal part */
460416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
461da3a660dSBarry Smith   /* send it on its way */
462416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
46364119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
464da3a660dSBarry Smith   /* do local part */
465416022c9SBarry Smith   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
466da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
467da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
468da3a660dSBarry Smith   /* but is not perhaps always true. */
469416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
47064119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
471da3a660dSBarry Smith   return 0;
472da3a660dSBarry Smith }
473da3a660dSBarry Smith 
474416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
475da3a660dSBarry Smith {
476416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
477da3a660dSBarry Smith   int        ierr;
478da3a660dSBarry Smith 
47948d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
480da3a660dSBarry Smith   /* do nondiagonal part */
481416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
482da3a660dSBarry Smith   /* send it on its way */
483416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
48464119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
485da3a660dSBarry Smith   /* do local part */
486416022c9SBarry Smith   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
487da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
488da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
489da3a660dSBarry Smith   /* but is not perhaps always true. */
490416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
49164119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
492da3a660dSBarry Smith   return 0;
493da3a660dSBarry Smith }
494da3a660dSBarry Smith 
4951eb62cbbSBarry Smith /*
4961eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
4971eb62cbbSBarry Smith    diagonal block
4981eb62cbbSBarry Smith */
499416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5001eb62cbbSBarry Smith {
501416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
50248d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
503416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5041eb62cbbSBarry Smith }
5051eb62cbbSBarry Smith 
50644a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5071eb62cbbSBarry Smith {
5081eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
50944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5101eb62cbbSBarry Smith   int        ierr;
511a5a9c739SBarry Smith #if defined(PETSC_LOG)
5126e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
513a5a9c739SBarry Smith #endif
5140452661fSBarry Smith   PetscFree(aij->rowners);
51578b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
51678b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5170452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5180452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5191eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
520a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5210452661fSBarry Smith   PetscFree(aij);
522a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5230452661fSBarry Smith   PetscHeaderDestroy(mat);
5241eb62cbbSBarry Smith   return 0;
5251eb62cbbSBarry Smith }
5267857610eSBarry Smith #include "draw.h"
527b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
528ee50ffe9SBarry Smith 
529416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5301eb62cbbSBarry Smith {
531416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
532416022c9SBarry Smith   int         ierr;
533416022c9SBarry Smith 
53417699dbbSLois Curfman McInnes   if (aij->size == 1) {
535416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
536416022c9SBarry Smith   }
537a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
538416022c9SBarry Smith   return 0;
539416022c9SBarry Smith }
540416022c9SBarry Smith 
541d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
542416022c9SBarry Smith {
54344a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
544dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
545a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
546d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
547d636dbe3SBarry Smith   FILE        *fd;
548416022c9SBarry Smith 
549416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
550416022c9SBarry Smith     ierr = ViewerFileGetFormat_Private(viewer,&format);
55108480c60SBarry Smith     if (format == FILE_FORMAT_INFO_DETAILED) {
552a56f8943SBarry Smith       int nz,nzalloc,mem;
553a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
554a56f8943SBarry Smith       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
555a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
556a56f8943SBarry Smith       MPIU_Seq_begin(mat->comm,1);
557a56f8943SBarry Smith       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
558a56f8943SBarry Smith                 nzalloc,mem);
55908480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
56008480c60SBarry Smith       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
56108480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
56208480c60SBarry Smith       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
563a56f8943SBarry Smith       fflush(fd);
564a56f8943SBarry Smith       MPIU_Seq_end(mat->comm,1);
565a56f8943SBarry Smith       ierr = VecScatterView(aij->Mvctx,viewer);
566416022c9SBarry Smith       return 0;
567416022c9SBarry Smith     }
56808480c60SBarry Smith     else if (format == FILE_FORMAT_INFO) {
56908480c60SBarry Smith       return 0;
57008480c60SBarry Smith     }
571416022c9SBarry Smith   }
572416022c9SBarry Smith 
573416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER) {
574d636dbe3SBarry Smith     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
5757f813858SBarry Smith     MPIU_Seq_begin(mat->comm,1);
576d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
57717699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5781eb62cbbSBarry Smith            aij->cend);
57978b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
58078b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
581d13ab20cSBarry Smith     fflush(fd);
5827f813858SBarry Smith     MPIU_Seq_end(mat->comm,1);
583d13ab20cSBarry Smith   }
584416022c9SBarry Smith   else {
585a56f8943SBarry Smith     int size = aij->size;
586a56f8943SBarry Smith     rank = aij->rank;
58717699dbbSLois Curfman McInnes     if (size == 1) {
58878b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
58995373324SBarry Smith     }
59095373324SBarry Smith     else {
59195373324SBarry Smith       /* assemble the entire matrix onto first processor. */
59295373324SBarry Smith       Mat         A;
593ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
5942eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
59595373324SBarry Smith       Scalar      *a;
5962ee70a88SLois Curfman McInnes 
59717699dbbSLois Curfman McInnes       if (!rank) {
598416022c9SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr);
59995373324SBarry Smith       }
60095373324SBarry Smith       else {
601416022c9SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr);
60295373324SBarry Smith       }
603464493b3SBarry Smith       PLogObjectParent(mat,A);
604416022c9SBarry Smith 
60595373324SBarry Smith 
60695373324SBarry Smith       /* copy over the A part */
607ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6082ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
60995373324SBarry Smith       row = aij->rstart;
610dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
61195373324SBarry Smith       for ( i=0; i<m; i++ ) {
612416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
61395373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
61495373324SBarry Smith       }
6152ee70a88SLois Curfman McInnes       aj = Aloc->j;
616dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
61795373324SBarry Smith 
61895373324SBarry Smith       /* copy over the B part */
619ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6202ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
62195373324SBarry Smith       row = aij->rstart;
6220452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
623dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
62495373324SBarry Smith       for ( i=0; i<m; i++ ) {
625416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
62695373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
62795373324SBarry Smith       }
6280452661fSBarry Smith       PetscFree(ct);
62978b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
63078b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
63117699dbbSLois Curfman McInnes       if (!rank) {
63278b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
63395373324SBarry Smith       }
63478b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
63595373324SBarry Smith     }
63695373324SBarry Smith   }
6371eb62cbbSBarry Smith   return 0;
6381eb62cbbSBarry Smith }
6391eb62cbbSBarry Smith 
640416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
641416022c9SBarry Smith {
642416022c9SBarry Smith   Mat         mat = (Mat) obj;
643416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
644416022c9SBarry Smith   int         ierr;
645416022c9SBarry Smith   PetscObject vobj = (PetscObject) viewer;
646416022c9SBarry Smith 
64748d91487SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
648416022c9SBarry Smith   if (!viewer) {
649416022c9SBarry Smith     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
650416022c9SBarry Smith   }
651416022c9SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
652416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
653d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
654416022c9SBarry Smith   }
655416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
656d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
657d7e8b826SBarry Smith   }
658d7e8b826SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
659d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
660416022c9SBarry Smith   }
661416022c9SBarry Smith   else if (vobj->cookie == DRAW_COOKIE) {
662d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
663416022c9SBarry Smith   }
664416022c9SBarry Smith   else if (vobj->type == BINARY_FILE_VIEWER) {
665416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
666416022c9SBarry Smith   }
667416022c9SBarry Smith   return 0;
668416022c9SBarry Smith }
669416022c9SBarry Smith 
670ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat);
6711eb62cbbSBarry Smith /*
6721eb62cbbSBarry Smith     This has to provide several versions.
6731eb62cbbSBarry Smith 
6741eb62cbbSBarry Smith      1) per sequential
6751eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6761eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
677d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
6781eb62cbbSBarry Smith */
679fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
680dbb450caSBarry Smith                            double fshift,int its,Vec xx)
6818a729477SBarry Smith {
68244a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
683d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
684ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
6856abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
6866abc6512SBarry Smith   int        ierr,*idx, *diag;
687416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
688da3a660dSBarry Smith   Vec        tt;
6898a729477SBarry Smith 
69048d91487SBarry Smith   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
6911eb62cbbSBarry Smith 
692d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
693dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
694dbb450caSBarry Smith   ls = ls + shift;
695ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
696d6dfbf8fSBarry Smith   diag = A->diag;
697acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
69848d91487SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
699acb40c82SBarry Smith   }
700da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
701da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
702da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
703da3a660dSBarry Smith 
704da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
705da3a660dSBarry Smith 
706da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
707da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
708da3a660dSBarry Smith     is the relaxation factor.
709da3a660dSBarry Smith     */
71078b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
711464493b3SBarry Smith     PLogObjectParent(matin,tt);
712da3a660dSBarry Smith     VecGetArray(tt,&t);
713da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
714da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
715da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
71664119d58SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
71778b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
718da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
719da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
720dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
721dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
722da3a660dSBarry Smith       sum  = b[i];
723da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
724dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
725da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
726dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
727dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
728da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
729da3a660dSBarry Smith       x[i] = omega*(sum/d);
730da3a660dSBarry Smith     }
73164119d58SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
73278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
733da3a660dSBarry Smith 
734da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
735da3a660dSBarry Smith     v = A->a;
736dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
737da3a660dSBarry Smith 
738da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
739dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
740da3a660dSBarry Smith     diag = A->diag;
741da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
74264119d58SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
74378b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
744da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
745da3a660dSBarry Smith       n    = diag[i] - A->i[i];
746dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
747dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
748da3a660dSBarry Smith       sum  = t[i];
749da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
750dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
751da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
752dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
753dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
754da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
755da3a660dSBarry Smith       t[i] = omega*(sum/d);
756da3a660dSBarry Smith     }
75764119d58SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
75878b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
759da3a660dSBarry Smith     /*  x = x + t */
760da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
761da3a660dSBarry Smith     VecDestroy(tt);
762da3a660dSBarry Smith     return 0;
763da3a660dSBarry Smith   }
764da3a660dSBarry Smith 
7651eb62cbbSBarry Smith 
766d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
767da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
768da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
769da3a660dSBarry Smith     }
770da3a660dSBarry Smith     else {
77164119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
77278b31e54SBarry Smith       CHKERRQ(ierr);
77364119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
77478b31e54SBarry Smith       CHKERRQ(ierr);
775da3a660dSBarry Smith     }
776d6dfbf8fSBarry Smith     while (its--) {
777d6dfbf8fSBarry Smith       /* go down through the rows */
77864119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
77978b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
780d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
781d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
782dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
783dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
784d6dfbf8fSBarry Smith         sum  = b[i];
785d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
786dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
787d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
788dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
789dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
790d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
791d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
792d6dfbf8fSBarry Smith       }
79364119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
79478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
795d6dfbf8fSBarry Smith       /* come up through the rows */
79664119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
79778b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
798d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
799d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
800dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
801dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
802d6dfbf8fSBarry Smith         sum  = b[i];
803d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
804dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
805d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
806dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
807dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
808d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
809d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
810d6dfbf8fSBarry Smith       }
81164119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
81278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
813d6dfbf8fSBarry Smith     }
814d6dfbf8fSBarry Smith   }
815d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
816da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
817da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
81864119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
81978b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
820da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
821da3a660dSBarry Smith         n    = diag[i] - A->i[i];
822dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
823dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
824da3a660dSBarry Smith         sum  = b[i];
825da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
826dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
827da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
828dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
829dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
830da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
831da3a660dSBarry Smith         x[i] = omega*(sum/d);
832da3a660dSBarry Smith       }
83364119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
83478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
835da3a660dSBarry Smith       its--;
836da3a660dSBarry Smith     }
837d6dfbf8fSBarry Smith     while (its--) {
83864119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
83978b31e54SBarry Smith       CHKERRQ(ierr);
84064119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
84178b31e54SBarry Smith       CHKERRQ(ierr);
84264119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
84378b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
844d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
845d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
846dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
847dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
848d6dfbf8fSBarry Smith         sum  = b[i];
849d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
850dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
851d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
852dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
853dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
854d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
855d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
856d6dfbf8fSBarry Smith       }
85764119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
85878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
859d6dfbf8fSBarry Smith     }
860d6dfbf8fSBarry Smith   }
861d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
862da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
863da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
86464119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
86578b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
866da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
867da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
868dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
869dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
870da3a660dSBarry Smith         sum  = b[i];
871da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
872dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
873da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
874dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
875dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
876da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
877da3a660dSBarry Smith         x[i] = omega*(sum/d);
878da3a660dSBarry Smith       }
87964119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
88078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
881da3a660dSBarry Smith       its--;
882da3a660dSBarry Smith     }
883d6dfbf8fSBarry Smith     while (its--) {
88464119d58SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
88578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
88664119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
88778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
88864119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
88978b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
890d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
891d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
892dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
893dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
894d6dfbf8fSBarry Smith         sum  = b[i];
895d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
896dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
897d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
898dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
899dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
900d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
901d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
902d6dfbf8fSBarry Smith       }
90364119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
90478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
905d6dfbf8fSBarry Smith     }
906d6dfbf8fSBarry Smith   }
907d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
908da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
909dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
910da3a660dSBarry Smith     }
91164119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
91278b31e54SBarry Smith     CHKERRQ(ierr);
91364119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
91478b31e54SBarry Smith     CHKERRQ(ierr);
915d6dfbf8fSBarry Smith     while (its--) {
916d6dfbf8fSBarry Smith       /* go down through the rows */
917d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
918d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
919dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
920dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
921d6dfbf8fSBarry Smith         sum  = b[i];
922d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
923dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
924d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
925dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
926dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
927d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
928d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
929d6dfbf8fSBarry Smith       }
930d6dfbf8fSBarry Smith       /* come up through the rows */
931d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
932d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
933dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
934dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
935d6dfbf8fSBarry Smith         sum  = b[i];
936d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
937dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
938d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
939dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
940dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
941d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
942d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
943d6dfbf8fSBarry Smith       }
944d6dfbf8fSBarry Smith     }
945d6dfbf8fSBarry Smith   }
946d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
947da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
948dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
949da3a660dSBarry Smith     }
95064119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
95178b31e54SBarry Smith     CHKERRQ(ierr);
95264119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
95378b31e54SBarry Smith     CHKERRQ(ierr);
954d6dfbf8fSBarry Smith     while (its--) {
955d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
956d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
957dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
958dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
959d6dfbf8fSBarry Smith         sum  = b[i];
960d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
961dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
962d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
963dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
964dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
965d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
966d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
967d6dfbf8fSBarry Smith       }
968d6dfbf8fSBarry Smith     }
969d6dfbf8fSBarry Smith   }
970d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
971da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
972dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
973da3a660dSBarry Smith     }
97464119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
97578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
97664119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
97778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
978d6dfbf8fSBarry Smith     while (its--) {
979d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
980d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
981dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
982dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
983d6dfbf8fSBarry Smith         sum  = b[i];
984d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
985dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
986d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
987dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
988dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
989d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
990d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
991d6dfbf8fSBarry Smith       }
992d6dfbf8fSBarry Smith     }
993d6dfbf8fSBarry Smith   }
9948a729477SBarry Smith   return 0;
9958a729477SBarry Smith }
996a66be287SLois Curfman McInnes 
997fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
998fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
999a66be287SLois Curfman McInnes {
1000a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1001a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
1002a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1003a66be287SLois Curfman McInnes 
100478b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
100578b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1006a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1007a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
1008a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1009a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1010d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1011a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1012a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1013d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1014a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1015a66be287SLois Curfman McInnes   }
1016a66be287SLois Curfman McInnes   return 0;
1017a66be287SLois Curfman McInnes }
1018a66be287SLois Curfman McInnes 
1019299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1020299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1021299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1022299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1023299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1024299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1025299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1026299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1027299609e3SLois Curfman McInnes 
1028416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1029c74985f6SBarry Smith {
1030c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1031c74985f6SBarry Smith 
1032c0bbcb79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
1033c0bbcb79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
1034c0bbcb79SLois Curfman McInnes       op == COLUMNS_SORTED ||
1035c0bbcb79SLois Curfman McInnes       op == ROW_ORIENTED) {
1036c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
1037c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
1038c74985f6SBarry Smith   }
1039c0bbcb79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
1040c0bbcb79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
1041c0bbcb79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1042c0bbcb79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
1043c0bbcb79SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1044bbb6d6a8SBarry Smith   else if (op == COLUMN_ORIENTED)
10454d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");}
1046c0bbcb79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
10474d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1048c0bbcb79SLois Curfman McInnes   else
10494d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1050c74985f6SBarry Smith   return 0;
1051c74985f6SBarry Smith }
1052c74985f6SBarry Smith 
1053d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1054c74985f6SBarry Smith {
105544a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1056c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1057c74985f6SBarry Smith   return 0;
1058c74985f6SBarry Smith }
1059c74985f6SBarry Smith 
1060d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1061c74985f6SBarry Smith {
106244a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1063b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1064c74985f6SBarry Smith   return 0;
1065c74985f6SBarry Smith }
1066c74985f6SBarry Smith 
1067d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1068c74985f6SBarry Smith {
106944a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1070c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1071c74985f6SBarry Smith   return 0;
1072c74985f6SBarry Smith }
1073c74985f6SBarry Smith 
107439e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
107539e00950SLois Curfman McInnes {
1076154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1077154123eaSLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1078154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1079154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
108039e00950SLois Curfman McInnes 
1081*b49de8d1SLois Curfman McInnes   if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix");
1082*b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1083abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
108439e00950SLois Curfman McInnes 
1085154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1086154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1087154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
108878b31e54SBarry Smith   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
108978b31e54SBarry Smith   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1090154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1091154123eaSLois Curfman McInnes 
1092154123eaSLois Curfman McInnes   if (v  || idx) {
1093154123eaSLois Curfman McInnes     if (nztot) {
1094154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1095299609e3SLois Curfman McInnes       int imark;
109648b35521SBarry Smith       if (mat->assembled) {
1097154123eaSLois Curfman McInnes         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
109848b35521SBarry Smith       }
1099154123eaSLois Curfman McInnes       if (v) {
11000452661fSBarry Smith         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
110139e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1102154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1103154123eaSLois Curfman McInnes           else break;
1104154123eaSLois Curfman McInnes         }
1105154123eaSLois Curfman McInnes         imark = i;
1106154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1107299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1108154123eaSLois Curfman McInnes       }
1109154123eaSLois Curfman McInnes       if (idx) {
11100452661fSBarry Smith         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1111154123eaSLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1112154123eaSLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1113154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1114154123eaSLois Curfman McInnes           else break;
1115154123eaSLois Curfman McInnes         }
1116154123eaSLois Curfman McInnes         imark = i;
1117154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1118299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
111939e00950SLois Curfman McInnes       }
112039e00950SLois Curfman McInnes     }
112139e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1122154123eaSLois Curfman McInnes   }
112339e00950SLois Curfman McInnes   *nz = nztot;
112478b31e54SBarry Smith   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
112578b31e54SBarry Smith   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
112639e00950SLois Curfman McInnes   return 0;
112739e00950SLois Curfman McInnes }
112839e00950SLois Curfman McInnes 
112939e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
113039e00950SLois Curfman McInnes {
11310452661fSBarry Smith   if (idx) PetscFree(*idx);
11320452661fSBarry Smith   if (v) PetscFree(*v);
113339e00950SLois Curfman McInnes   return 0;
113439e00950SLois Curfman McInnes }
113539e00950SLois Curfman McInnes 
1136cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1137855ac2c5SLois Curfman McInnes {
1138855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1139ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1140416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1141416022c9SBarry Smith   double     sum = 0.0;
114204ca555eSLois Curfman McInnes   Scalar     *v;
114304ca555eSLois Curfman McInnes 
1144416022c9SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
114517699dbbSLois Curfman McInnes   if (aij->size == 1) {
114614183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
114737fa93a5SLois Curfman McInnes   } else {
114804ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
114904ca555eSLois Curfman McInnes       v = amat->a;
115004ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
115104ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
115204ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
115304ca555eSLois Curfman McInnes #else
115404ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
115504ca555eSLois Curfman McInnes #endif
115604ca555eSLois Curfman McInnes       }
115704ca555eSLois Curfman McInnes       v = bmat->a;
115804ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
115904ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
116004ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
116104ca555eSLois Curfman McInnes #else
116204ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
116304ca555eSLois Curfman McInnes #endif
116404ca555eSLois Curfman McInnes       }
116504ca555eSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
116604ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
116704ca555eSLois Curfman McInnes     }
116804ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
116904ca555eSLois Curfman McInnes       double *tmp, *tmp2;
117004ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
11710452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
11720452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1173cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
117404ca555eSLois Curfman McInnes       *norm = 0.0;
117504ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
117604ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1177579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
117804ca555eSLois Curfman McInnes       }
117904ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
118004ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1181579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
118204ca555eSLois Curfman McInnes       }
118304ca555eSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
118404ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
118504ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
118604ca555eSLois Curfman McInnes       }
11870452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
118804ca555eSLois Curfman McInnes     }
118904ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1190515d9167SLois Curfman McInnes       double ntemp = 0.0;
119104ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1192dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
119304ca555eSLois Curfman McInnes         sum = 0.0;
119404ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1195cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
119604ca555eSLois Curfman McInnes         }
1197dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
119804ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1199cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
120004ca555eSLois Curfman McInnes         }
1201515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
120204ca555eSLois Curfman McInnes       }
1203515d9167SLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
120404ca555eSLois Curfman McInnes     }
120504ca555eSLois Curfman McInnes     else {
1206515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
120704ca555eSLois Curfman McInnes     }
120837fa93a5SLois Curfman McInnes   }
1209855ac2c5SLois Curfman McInnes   return 0;
1210855ac2c5SLois Curfman McInnes }
1211855ac2c5SLois Curfman McInnes 
12120de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1213b7c46309SBarry Smith {
1214b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1215dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1216416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1217416022c9SBarry Smith   Mat        B;
1218b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1219b7c46309SBarry Smith   Scalar     *array;
1220b7c46309SBarry Smith 
1221416022c9SBarry Smith   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1222b7c46309SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1223b7c46309SBarry Smith   CHKERRQ(ierr);
1224b7c46309SBarry Smith 
1225b7c46309SBarry Smith   /* copy over the A part */
1226ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1227b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1228b7c46309SBarry Smith   row = a->rstart;
1229dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1230b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1231416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1232b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1233b7c46309SBarry Smith   }
1234b7c46309SBarry Smith   aj = Aloc->j;
1235dbb450caSBarry Smith   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1236b7c46309SBarry Smith 
1237b7c46309SBarry Smith   /* copy over the B part */
1238ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1239b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1240b7c46309SBarry Smith   row = a->rstart;
12410452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1242dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1243b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1244416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1245b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1246b7c46309SBarry Smith   }
12470452661fSBarry Smith   PetscFree(ct);
1248b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1249b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
12500de55854SLois Curfman McInnes   if (matout) {
12510de55854SLois Curfman McInnes     *matout = B;
12520de55854SLois Curfman McInnes   } else {
12530de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12540452661fSBarry Smith     PetscFree(a->rowners);
12550de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
12560de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
12570452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
12580452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
12590de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1260a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
12610452661fSBarry Smith     PetscFree(a);
1262416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
12630452661fSBarry Smith     PetscHeaderDestroy(B);
12640de55854SLois Curfman McInnes   }
1265b7c46309SBarry Smith   return 0;
1266b7c46309SBarry Smith }
1267b7c46309SBarry Smith 
1268fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1269a56f8943SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int);
1270d6dfbf8fSBarry Smith 
12718a729477SBarry Smith /* -------------------------------------------------------------------*/
12722ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
127339e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
127444a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
127544a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1276299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1277299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1278299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
127944a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1280b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1281a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1282855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1283ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
12841eb62cbbSBarry Smith        0,
1285299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1286299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1287299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1288d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1289299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
1290*b49de8d1SLois Curfman McInnes        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ,0,0,
1291*b49de8d1SLois Curfman McInnes        0,0,0,
1292*b49de8d1SLois Curfman McInnes        0,0,MatGetValues_MPIAIJ};
12938a729477SBarry Smith 
12941987afe7SBarry Smith /*@C
1295ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1296ff756334SLois Curfman McInnes    (the default parallel PETSc format).
12978a729477SBarry Smith 
12988a729477SBarry Smith    Input Parameters:
12991eb62cbbSBarry Smith .  comm - MPI communicator
13007d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
13017d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
13027d3e4905SLois Curfman McInnes            if N is given)
13037d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
13047d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
13057d3e4905SLois Curfman McInnes            if n is given)
1306ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1307ff756334SLois Curfman McInnes            (same for all local rows)
1308ab693e5aSLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1309ab693e5aSLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1310ab693e5aSLois Curfman McInnes            for the diagonal entry even if it is zero.
1311ab693e5aSLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1312ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
1313ab693e5aSLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1314ab693e5aSLois Curfman McInnes            submatrix or null (possibly different for each row).
13158a729477SBarry Smith 
1316ff756334SLois Curfman McInnes    Output Parameter:
13178a729477SBarry Smith .  newmat - the matrix
13188a729477SBarry Smith 
1319ff756334SLois Curfman McInnes    Notes:
1320ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1321ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
13220002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
13230002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1324ff756334SLois Curfman McInnes 
1325ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1326ff756334SLois Curfman McInnes    (possibly both).
1327ff756334SLois Curfman McInnes 
1328e0245417SLois Curfman McInnes    Storage Information:
1329e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1330e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1331e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1332e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1333e0245417SLois Curfman McInnes 
1334e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
1335e0245417SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both). Set both
1336e0245417SLois Curfman McInnes    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1337e0245417SLois Curfman McInnes    Likewise, specify preallocated storage for the off-diagonal part of
1338e0245417SLois Curfman McInnes    the local submatrix with o_nz or o_nnz (not both).
1339ff756334SLois Curfman McInnes 
1340dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1341ff756334SLois Curfman McInnes 
1342fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
13438a729477SBarry Smith @*/
13441eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
13451eb62cbbSBarry Smith                     int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
13468a729477SBarry Smith {
13478a729477SBarry Smith   Mat          mat;
1348416022c9SBarry Smith   Mat_MPIAIJ   *a;
13496abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1350416022c9SBarry Smith 
13518a729477SBarry Smith   *newmat         = 0;
13520452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1353a5a9c739SBarry Smith   PLogObjectCreate(mat);
13540452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1355416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
135644a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
135744a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
13588a729477SBarry Smith   mat->factor     = 0;
1359d6dfbf8fSBarry Smith 
136064119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
136117699dbbSLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
136217699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
13631eb62cbbSBarry Smith 
13641987afe7SBarry Smith   if (m == PETSC_DECIDE && (d_nnz || o_nnz))
13651987afe7SBarry Smith     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
13661987afe7SBarry Smith 
1367dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
13681eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1369d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1370dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1371dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
13721eb62cbbSBarry Smith   }
137317699dbbSLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
137417699dbbSLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1375416022c9SBarry Smith   a->m = m;
1376416022c9SBarry Smith   a->n = n;
1377416022c9SBarry Smith   a->N = N;
1378416022c9SBarry Smith   a->M = M;
13791eb62cbbSBarry Smith 
13801eb62cbbSBarry Smith   /* build local table of row and column ownerships */
13810452661fSBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1382579c6b6fSBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
138317699dbbSLois Curfman McInnes   a->cowners = a->rowners + a->size + 1;
1384416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1385416022c9SBarry Smith   a->rowners[0] = 0;
138617699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1387416022c9SBarry Smith     a->rowners[i] += a->rowners[i-1];
13888a729477SBarry Smith   }
138917699dbbSLois Curfman McInnes   a->rstart = a->rowners[a->rank];
139017699dbbSLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
1391416022c9SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1392416022c9SBarry Smith   a->cowners[0] = 0;
139317699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1394416022c9SBarry Smith     a->cowners[i] += a->cowners[i-1];
13958a729477SBarry Smith   }
139617699dbbSLois Curfman McInnes   a->cstart = a->cowners[a->rank];
139717699dbbSLois Curfman McInnes   a->cend   = a->cowners[a->rank+1];
13988a729477SBarry Smith 
1399416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1400416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1401416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1402416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14038a729477SBarry Smith 
14041eb62cbbSBarry Smith   /* build cache for off array entries formed */
1405416022c9SBarry Smith   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1406416022c9SBarry Smith   a->colmap    = 0;
1407416022c9SBarry Smith   a->garray    = 0;
14088a729477SBarry Smith 
14091eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
1410416022c9SBarry Smith   a->lvec      = 0;
1411416022c9SBarry Smith   a->Mvctx     = 0;
1412416022c9SBarry Smith   a->assembled = 0;
14138a729477SBarry Smith 
1414d6dfbf8fSBarry Smith   *newmat = mat;
1415d6dfbf8fSBarry Smith   return 0;
1416d6dfbf8fSBarry Smith }
1417c74985f6SBarry Smith 
1418a56f8943SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1419d6dfbf8fSBarry Smith {
1420d6dfbf8fSBarry Smith   Mat        mat;
1421416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
14222ee70a88SLois Curfman McInnes   int        ierr, len;
1423d6dfbf8fSBarry Smith 
1424416022c9SBarry Smith   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix");
1425416022c9SBarry Smith   *newmat       = 0;
14260452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1427a5a9c739SBarry Smith   PLogObjectCreate(mat);
14280452661fSBarry Smith   mat->data     = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1429416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
143044a69424SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIAIJ;
143144a69424SLois Curfman McInnes   mat->view     = MatView_MPIAIJ;
1432d6dfbf8fSBarry Smith   mat->factor   = matin->factor;
1433d6dfbf8fSBarry Smith 
1434416022c9SBarry Smith   a->m          = oldmat->m;
1435416022c9SBarry Smith   a->n          = oldmat->n;
1436416022c9SBarry Smith   a->M          = oldmat->M;
1437416022c9SBarry Smith   a->N          = oldmat->N;
1438d6dfbf8fSBarry Smith 
1439416022c9SBarry Smith   a->assembled  = 1;
1440416022c9SBarry Smith   a->rstart     = oldmat->rstart;
1441416022c9SBarry Smith   a->rend       = oldmat->rend;
1442416022c9SBarry Smith   a->cstart     = oldmat->cstart;
1443416022c9SBarry Smith   a->cend       = oldmat->cend;
144417699dbbSLois Curfman McInnes   a->size       = oldmat->size;
144517699dbbSLois Curfman McInnes   a->rank       = oldmat->rank;
144664119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
1447d6dfbf8fSBarry Smith 
14480452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
144917699dbbSLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
145017699dbbSLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1451416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14522ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
14530452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1454416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1455416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1456416022c9SBarry Smith   } else a->colmap = 0;
1457ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
14580452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1459464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1460416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1461416022c9SBarry Smith   } else a->garray = 0;
1462d6dfbf8fSBarry Smith 
1463416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1464416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1465a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1466416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1467416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1468416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1469416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1470416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14718a729477SBarry Smith   *newmat = mat;
14728a729477SBarry Smith   return 0;
14738a729477SBarry Smith }
1474416022c9SBarry Smith 
1475416022c9SBarry Smith #include "sysio.h"
1476416022c9SBarry Smith 
147702834360SBarry Smith int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1478416022c9SBarry Smith {
1479d65a2f8fSBarry Smith   Mat          A;
1480416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1481d65a2f8fSBarry Smith   Scalar       *vals,*svals;
1482416022c9SBarry Smith   PetscObject  vobj = (PetscObject) bview;
1483416022c9SBarry Smith   MPI_Comm     comm = vobj->comm;
1484416022c9SBarry Smith   MPI_Status   status;
148517699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1486d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1487d65a2f8fSBarry Smith   int          tag = ((PetscObject)bview)->tag;
1488416022c9SBarry Smith 
148917699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
149017699dbbSLois Curfman McInnes   if (!rank) {
1491416022c9SBarry Smith     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1492416022c9SBarry Smith     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
149302834360SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1494416022c9SBarry Smith   }
1495416022c9SBarry Smith 
1496416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1497416022c9SBarry Smith   M = header[1]; N = header[2];
1498416022c9SBarry Smith   /* determine ownership of all rows */
149917699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
15000452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1501416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1502416022c9SBarry Smith   rowners[0] = 0;
150317699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1504416022c9SBarry Smith     rowners[i] += rowners[i-1];
1505416022c9SBarry Smith   }
150617699dbbSLois Curfman McInnes   rstart = rowners[rank];
150717699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1508416022c9SBarry Smith 
1509416022c9SBarry Smith   /* distribute row lengths to all processors */
15100452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1511416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
151217699dbbSLois Curfman McInnes   if (!rank) {
15130452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1514416022c9SBarry Smith     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
15150452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
151617699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1517d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
15180452661fSBarry Smith     PetscFree(sndcounts);
1519416022c9SBarry Smith   }
1520416022c9SBarry Smith   else {
1521416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1522416022c9SBarry Smith   }
1523416022c9SBarry Smith 
152417699dbbSLois Curfman McInnes   if (!rank) {
1525416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
15260452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1527cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
152817699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1529416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1530416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1531416022c9SBarry Smith       }
1532416022c9SBarry Smith     }
15330452661fSBarry Smith     PetscFree(rowlengths);
1534416022c9SBarry Smith 
1535416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1536416022c9SBarry Smith     maxnz = 0;
153717699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
15380452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1539416022c9SBarry Smith     }
15400452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1541416022c9SBarry Smith 
1542416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1543416022c9SBarry Smith     nz = procsnz[0];
15440452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1545d65a2f8fSBarry Smith     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1546d65a2f8fSBarry Smith 
1547d65a2f8fSBarry Smith     /* read in every one elses and ship off */
154817699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1549d65a2f8fSBarry Smith       nz = procsnz[i];
1550416022c9SBarry Smith       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1551d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1552d65a2f8fSBarry Smith     }
15530452661fSBarry Smith     PetscFree(cols);
1554416022c9SBarry Smith   }
1555416022c9SBarry Smith   else {
1556416022c9SBarry Smith     /* determine buffer space needed for message */
1557416022c9SBarry Smith     nz = 0;
1558416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1559416022c9SBarry Smith       nz += ourlens[i];
1560416022c9SBarry Smith     }
15610452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1562416022c9SBarry Smith 
1563416022c9SBarry Smith     /* receive message of column indices*/
1564d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1565416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
156602834360SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1567416022c9SBarry Smith   }
1568416022c9SBarry Smith 
1569416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1570cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1571416022c9SBarry Smith   jj = 0;
1572416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1573416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1574d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1575416022c9SBarry Smith       jj++;
1576416022c9SBarry Smith     }
1577416022c9SBarry Smith   }
1578d65a2f8fSBarry Smith 
1579d65a2f8fSBarry Smith   /* create our matrix */
1580416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1581416022c9SBarry Smith     ourlens[i] -= offlens[i];
1582416022c9SBarry Smith   }
1583d65a2f8fSBarry Smith   if (type == MATMPIAIJ) {
1584d65a2f8fSBarry Smith     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1585d65a2f8fSBarry Smith   }
1586d65a2f8fSBarry Smith   else if (type == MATMPIROW) {
1587d65a2f8fSBarry Smith     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1588d65a2f8fSBarry Smith   }
1589d65a2f8fSBarry Smith   A = *newmat;
1590d65a2f8fSBarry Smith   MatSetOption(A,COLUMNS_SORTED);
1591d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1592d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1593d65a2f8fSBarry Smith   }
1594416022c9SBarry Smith 
159517699dbbSLois Curfman McInnes   if (!rank) {
15960452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1597416022c9SBarry Smith 
1598416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1599416022c9SBarry Smith     nz = procsnz[0];
1600416022c9SBarry Smith     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1601d65a2f8fSBarry Smith 
1602d65a2f8fSBarry Smith     /* insert into matrix */
1603d65a2f8fSBarry Smith     jj      = rstart;
1604d65a2f8fSBarry Smith     smycols = mycols;
1605d65a2f8fSBarry Smith     svals   = vals;
1606d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1607d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1608d65a2f8fSBarry Smith       smycols += ourlens[i];
1609d65a2f8fSBarry Smith       svals   += ourlens[i];
1610d65a2f8fSBarry Smith       jj++;
1611416022c9SBarry Smith     }
1612416022c9SBarry Smith 
1613d65a2f8fSBarry Smith     /* read in other processors and ship out */
161417699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1615416022c9SBarry Smith       nz = procsnz[i];
1616416022c9SBarry Smith       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1617d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1618416022c9SBarry Smith     }
16190452661fSBarry Smith     PetscFree(procsnz);
1620416022c9SBarry Smith   }
1621d65a2f8fSBarry Smith   else {
1622d65a2f8fSBarry Smith     /* receive numeric values */
16230452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1624416022c9SBarry Smith 
1625d65a2f8fSBarry Smith     /* receive message of values*/
1626d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1627d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
162802834360SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1629d65a2f8fSBarry Smith 
1630d65a2f8fSBarry Smith     /* insert into matrix */
1631d65a2f8fSBarry Smith     jj      = rstart;
1632d65a2f8fSBarry Smith     smycols = mycols;
1633d65a2f8fSBarry Smith     svals   = vals;
1634d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1635d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1636d65a2f8fSBarry Smith       smycols += ourlens[i];
1637d65a2f8fSBarry Smith       svals   += ourlens[i];
1638d65a2f8fSBarry Smith       jj++;
1639d65a2f8fSBarry Smith     }
1640d65a2f8fSBarry Smith   }
16410452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1642d65a2f8fSBarry Smith 
1643d65a2f8fSBarry Smith   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1644d65a2f8fSBarry Smith   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1645416022c9SBarry Smith   return 0;
1646416022c9SBarry Smith }
1647