xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c60448a52889ebeb9af4e25a0e1bb8def1ff3021)
1cb512458SBarry Smith #ifndef lint
2*c60448a5SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.50 1995/06/14 17:24:10 bsmith Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
149e25ed09SBarry Smith static int CreateColmap(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1744a69424SLois Curfman McInnes   Mat_AIJ    *B = (Mat_AIJ*) aij->B->data;
189e25ed09SBarry Smith   int        n = B->n,i;
1978b31e54SBarry Smith   aij->colmap = (int *) PETSCMALLOC( aij->N*sizeof(int) ); CHKPTRQ(aij->colmap);
2078b31e54SBarry Smith   PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int));
21abc0e9e4SLois Curfman McInnes   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
229e25ed09SBarry Smith   return 0;
239e25ed09SBarry Smith }
249e25ed09SBarry Smith 
252ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
261eb62cbbSBarry Smith                             int *idxn,Scalar *v,InsertMode addv)
278a729477SBarry Smith {
2844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
291eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
301eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
318a729477SBarry Smith 
3207a0e7adSLois Curfman McInnes   if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) {
3378b31e54SBarry Smith     SETERRQ(1,"You cannot mix inserts and adds");
348a729477SBarry Smith   }
351eb62cbbSBarry Smith   aij->insertmode = addv;
368a729477SBarry Smith   for ( i=0; i<m; i++ ) {
3778b31e54SBarry Smith     if (idxm[i] < 0) SETERRQ(1,"Negative row index");
3878b31e54SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(1,"Row index too large");
391eb62cbbSBarry Smith     if (idxm[i] >= rstart && idxm[i] < rend) {
401eb62cbbSBarry Smith       row = idxm[i] - rstart;
411eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
4278b31e54SBarry Smith         if (idxn[j] < 0) SETERRQ(1,"Negative column index");
4378b31e54SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(1,"Column index too large");
441eb62cbbSBarry Smith         if (idxn[j] >= cstart && idxn[j] < cend){
451eb62cbbSBarry Smith           col = idxn[j] - cstart;
4678b31e54SBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
471eb62cbbSBarry Smith         }
481eb62cbbSBarry Smith         else {
49d6dfbf8fSBarry Smith           if (aij->assembled) {
5078b31e54SBarry Smith             if (!aij->colmap) {ierr = CreateColmap(mat); CHKERRQ(ierr);}
519e25ed09SBarry Smith             col = aij->colmap[idxn[j]] - 1;
529e25ed09SBarry Smith             if (col < 0) {
5378b31e54SBarry Smith               SETERRQ(1,"Cannot insert new off diagonal block nonzero in\
549e25ed09SBarry Smith                      already\
55d6dfbf8fSBarry Smith                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
56d6dfbf8fSBarry Smith                      if your need this feature");
57d6dfbf8fSBarry Smith             }
589e25ed09SBarry Smith           }
599e25ed09SBarry Smith           else col = idxn[j];
6078b31e54SBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
611eb62cbbSBarry Smith         }
621eb62cbbSBarry Smith       }
631eb62cbbSBarry Smith     }
641eb62cbbSBarry Smith     else {
65dbd7a890SLois Curfman McInnes       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);
6678b31e54SBarry Smith       CHKERRQ(ierr);
671eb62cbbSBarry Smith     }
688a729477SBarry Smith   }
698a729477SBarry Smith   return 0;
708a729477SBarry Smith }
718a729477SBarry Smith 
728a729477SBarry Smith /*
731eb62cbbSBarry Smith     the assembly code is a lot like the code for vectors, we should
741eb62cbbSBarry Smith     sometime derive a single assembly code that can be used for
751eb62cbbSBarry Smith     either case.
768a729477SBarry Smith */
778a729477SBarry Smith 
78fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798a729477SBarry Smith {
8044a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
81d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
826abc6512SBarry Smith   int         numtids = aij->numtids, *owners = aij->rowners;
831eb62cbbSBarry Smith   int         mytid = aij->mytid;
841eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
856abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
865392566eSBarry Smith   int         tag = mat->tag, *owner,*starts,count,ierr;
871eb62cbbSBarry Smith   InsertMode  addv;
881eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
891eb62cbbSBarry Smith 
901eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
9128988994SBarry Smith   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
921eb62cbbSBarry Smith                 MPI_BOR,comm);
939d00d63dSBarry Smith   if (addv == (ADDVALUES|INSERTVALUES)) {
9478b31e54SBarry Smith     SETERRQ(1,"Some processors have inserted while others have added");
951eb62cbbSBarry Smith   }
961eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
971eb62cbbSBarry Smith 
981eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
9978b31e54SBarry Smith   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
10078b31e54SBarry Smith   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
10178b31e54SBarry Smith   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1021eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1031eb62cbbSBarry Smith     idx = aij->stash.idx[i];
1041eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
1051eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1061eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1078a729477SBarry Smith       }
1088a729477SBarry Smith     }
1098a729477SBarry Smith   }
1101eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
1111eb62cbbSBarry Smith 
1121eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
11378b31e54SBarry Smith   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
1141eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
1151eb62cbbSBarry Smith   nreceives = work[mytid];
1161eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
1171eb62cbbSBarry Smith   nmax = work[mytid];
11878b31e54SBarry Smith   PETSCFREE(work);
1191eb62cbbSBarry Smith 
1201eb62cbbSBarry Smith   /* post receives:
1211eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1221eb62cbbSBarry Smith      (global index,value) we store the global index as a double
123d6dfbf8fSBarry Smith      to simplify the message passing.
1241eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1251eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1261eb62cbbSBarry Smith      this is a lot of wasted space.
1271eb62cbbSBarry Smith 
1281eb62cbbSBarry Smith 
1291eb62cbbSBarry Smith        This could be done better.
1301eb62cbbSBarry Smith   */
13178b31e54SBarry Smith   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
13278b31e54SBarry Smith   CHKPTRQ(rvalues);
13378b31e54SBarry Smith   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
13478b31e54SBarry Smith   CHKPTRQ(recv_waits);
1351eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
136*c60448a5SBarry Smith     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1371eb62cbbSBarry Smith               comm,recv_waits+i);
1381eb62cbbSBarry Smith   }
1391eb62cbbSBarry Smith 
1401eb62cbbSBarry Smith   /* do sends:
1411eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1421eb62cbbSBarry Smith          the ith processor
1431eb62cbbSBarry Smith   */
14478b31e54SBarry Smith   svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
14578b31e54SBarry Smith   CHKPTRQ(svalues);
14678b31e54SBarry Smith   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
14778b31e54SBarry Smith   CHKPTRQ(send_waits);
14878b31e54SBarry Smith   starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts);
1491eb62cbbSBarry Smith   starts[0] = 0;
1501eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1511eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1521eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
1531eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
1541eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
1551eb62cbbSBarry Smith   }
15678b31e54SBarry Smith   PETSCFREE(owner);
1571eb62cbbSBarry Smith   starts[0] = 0;
1581eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1591eb62cbbSBarry Smith   count = 0;
1601eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
1611eb62cbbSBarry Smith     if (procs[i]) {
162*c60448a5SBarry Smith       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
1631eb62cbbSBarry Smith                 comm,send_waits+count++);
1641eb62cbbSBarry Smith     }
1651eb62cbbSBarry Smith   }
16678b31e54SBarry Smith   PETSCFREE(starts); PETSCFREE(nprocs);
1671eb62cbbSBarry Smith 
1681eb62cbbSBarry Smith   /* Free cache space */
16978b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
1701eb62cbbSBarry Smith 
1711eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues = rvalues;
1721eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs = nreceives;
1731eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
1741eb62cbbSBarry Smith   aij->rmax       = nmax;
1751eb62cbbSBarry Smith 
1761eb62cbbSBarry Smith   return 0;
1771eb62cbbSBarry Smith }
17844a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
1791eb62cbbSBarry Smith 
180fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
1811eb62cbbSBarry Smith {
1821eb62cbbSBarry Smith   int        ierr;
18344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1841eb62cbbSBarry Smith 
1851eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
1866abc6512SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
1871eb62cbbSBarry Smith   int         row,col;
1881eb62cbbSBarry Smith   Scalar      *values,val;
1891eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
1901eb62cbbSBarry Smith 
1911eb62cbbSBarry Smith   /*  wait on receives */
1921eb62cbbSBarry Smith   while (count) {
193d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
1941eb62cbbSBarry Smith     /* unpack receives into our local space */
195d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
196*c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
1971eb62cbbSBarry Smith     n = n/3;
1981eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
1991eb62cbbSBarry Smith       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
2001eb62cbbSBarry Smith       col = (int) PETSCREAL(values[3*i+1]);
2011eb62cbbSBarry Smith       val = values[3*i+2];
2021eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2031eb62cbbSBarry Smith           col -= aij->cstart;
2041eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2051eb62cbbSBarry Smith       }
2061eb62cbbSBarry Smith       else {
207d6dfbf8fSBarry Smith         if (aij->assembled) {
20878b31e54SBarry Smith           if (!aij->colmap) {ierr = CreateColmap(mat); CHKERRQ(ierr);}
2099e25ed09SBarry Smith           col = aij->colmap[col] - 1;
2109e25ed09SBarry Smith           if (col < 0) {
21178b31e54SBarry Smith             SETERRQ(1,"Cannot insert new off diagonal block nonzero in\
2129e25ed09SBarry Smith                      already\
213d6dfbf8fSBarry Smith                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
214d6dfbf8fSBarry Smith                      if your need this feature");
215d6dfbf8fSBarry Smith           }
2169e25ed09SBarry Smith         }
2171eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2181eb62cbbSBarry Smith       }
2191eb62cbbSBarry Smith     }
2201eb62cbbSBarry Smith     count--;
2211eb62cbbSBarry Smith   }
22278b31e54SBarry Smith   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
2231eb62cbbSBarry Smith 
2241eb62cbbSBarry Smith   /* wait on sends */
2251eb62cbbSBarry Smith   if (aij->nsends) {
22678b31e54SBarry Smith     send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) );
22778b31e54SBarry Smith     CHKPTRQ(send_status);
2281eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
22978b31e54SBarry Smith     PETSCFREE(send_status);
2301eb62cbbSBarry Smith   }
23178b31e54SBarry Smith   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
2321eb62cbbSBarry Smith 
23307a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
23478b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
23578b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2361eb62cbbSBarry Smith 
2375e42470aSBarry Smith   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
23878b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
239d6dfbf8fSBarry Smith     aij->assembled = 1;
2405e42470aSBarry Smith   }
24178b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
24278b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2435e42470aSBarry Smith 
2448a729477SBarry Smith   return 0;
2458a729477SBarry Smith }
2468a729477SBarry Smith 
2472ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
2481eb62cbbSBarry Smith {
24944a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
250dbd7a890SLois Curfman McInnes   int ierr;
25178b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
25278b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
2531eb62cbbSBarry Smith   return 0;
2541eb62cbbSBarry Smith }
2551eb62cbbSBarry Smith 
2561eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and
2571eb62cbbSBarry Smith    scatter create routines, we should try to do it systemamatically
2581eb62cbbSBarry Smith    if we can figure out the proper level of generality. */
2591eb62cbbSBarry Smith 
2601eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
2611eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
2621eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
2631eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
2641eb62cbbSBarry Smith    routine.
2651eb62cbbSBarry Smith */
26644a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
2671eb62cbbSBarry Smith {
26844a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
2691eb62cbbSBarry Smith   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
2706abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
2711eb62cbbSBarry Smith   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
2725392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
273d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
274d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
2751eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
2761eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
2771eb62cbbSBarry Smith   IS             istmp;
2781eb62cbbSBarry Smith 
279c8f67822SLois Curfman McInnes   if (!l->assembled)
28078b31e54SBarry Smith     SETERRQ(1,"MatZeroRows_MPIAIJ: Must assemble matrix first");
28178b31e54SBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
28278b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
2831eb62cbbSBarry Smith 
2841eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
28578b31e54SBarry Smith   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
28678b31e54SBarry Smith   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
28778b31e54SBarry Smith   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
2881eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
2891eb62cbbSBarry Smith     idx = rows[i];
2901eb62cbbSBarry Smith     found = 0;
2911eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
2921eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
2931eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2941eb62cbbSBarry Smith       }
2951eb62cbbSBarry Smith     }
29678b31e54SBarry Smith     if (!found) SETERRQ(1,"Index out of range.");
2971eb62cbbSBarry Smith   }
2981eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
2991eb62cbbSBarry Smith 
3001eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
30178b31e54SBarry Smith   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
3021eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
3031eb62cbbSBarry Smith   nrecvs = work[mytid];
3041eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
3051eb62cbbSBarry Smith   nmax = work[mytid];
30678b31e54SBarry Smith   PETSCFREE(work);
3071eb62cbbSBarry Smith 
3081eb62cbbSBarry Smith   /* post receives:   */
30978b31e54SBarry Smith   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
31078b31e54SBarry Smith   CHKPTRQ(rvalues);
31178b31e54SBarry Smith   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
31278b31e54SBarry Smith   CHKPTRQ(recv_waits);
3131eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3141eb62cbbSBarry Smith     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
3151eb62cbbSBarry Smith               comm,recv_waits+i);
3161eb62cbbSBarry Smith   }
3171eb62cbbSBarry Smith 
3181eb62cbbSBarry Smith   /* do sends:
3191eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3201eb62cbbSBarry Smith          the ith processor
3211eb62cbbSBarry Smith   */
32278b31e54SBarry Smith   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
32378b31e54SBarry Smith   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
32478b31e54SBarry Smith   CHKPTRQ(send_waits);
32578b31e54SBarry Smith   starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts);
3261eb62cbbSBarry Smith   starts[0] = 0;
3271eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3281eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3291eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3301eb62cbbSBarry Smith   }
3311eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3321eb62cbbSBarry Smith 
3331eb62cbbSBarry Smith   starts[0] = 0;
3341eb62cbbSBarry Smith   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3351eb62cbbSBarry Smith   count = 0;
3361eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
3371eb62cbbSBarry Smith     if (procs[i]) {
3381eb62cbbSBarry Smith       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
3391eb62cbbSBarry Smith                 comm,send_waits+count++);
3401eb62cbbSBarry Smith     }
3411eb62cbbSBarry Smith   }
34278b31e54SBarry Smith   PETSCFREE(starts);
3431eb62cbbSBarry Smith 
3441eb62cbbSBarry Smith   base = owners[mytid];
3451eb62cbbSBarry Smith 
3461eb62cbbSBarry Smith   /*  wait on receives */
34778b31e54SBarry Smith   lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3481eb62cbbSBarry Smith   source = lens + nrecvs;
3491eb62cbbSBarry Smith   count = nrecvs; slen = 0;
3501eb62cbbSBarry Smith   while (count) {
351d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3521eb62cbbSBarry Smith     /* unpack receives into our local space */
3531eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
354d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
355d6dfbf8fSBarry Smith     lens[imdex]  = n;
3561eb62cbbSBarry Smith     slen += n;
3571eb62cbbSBarry Smith     count--;
3581eb62cbbSBarry Smith   }
35978b31e54SBarry Smith   PETSCFREE(recv_waits);
3601eb62cbbSBarry Smith 
3611eb62cbbSBarry Smith   /* move the data into the send scatter */
36278b31e54SBarry Smith   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
3631eb62cbbSBarry Smith   count = 0;
3641eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3651eb62cbbSBarry Smith     values = rvalues + i*nmax;
3661eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
3671eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
3681eb62cbbSBarry Smith     }
3691eb62cbbSBarry Smith   }
37078b31e54SBarry Smith   PETSCFREE(rvalues); PETSCFREE(lens);
37178b31e54SBarry Smith   PETSCFREE(owner); PETSCFREE(nprocs);
3721eb62cbbSBarry Smith 
3731eb62cbbSBarry Smith   /* actually zap the local rows */
3746b5873e3SBarry Smith   ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp);
37578b31e54SBarry Smith   CHKERRQ(ierr);  PETSCFREE(lrows);
37678b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
37778b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
37878b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
3791eb62cbbSBarry Smith 
3801eb62cbbSBarry Smith   /* wait on sends */
3811eb62cbbSBarry Smith   if (nsends) {
38278b31e54SBarry Smith     send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) );
38378b31e54SBarry Smith     CHKPTRQ(send_status);
3841eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
38578b31e54SBarry Smith     PETSCFREE(send_status);
3861eb62cbbSBarry Smith   }
38778b31e54SBarry Smith   PETSCFREE(send_waits); PETSCFREE(svalues);
3881eb62cbbSBarry Smith 
3891eb62cbbSBarry Smith 
3901eb62cbbSBarry Smith   return 0;
3911eb62cbbSBarry Smith }
3921eb62cbbSBarry Smith 
39344a69424SLois Curfman McInnes static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
3941eb62cbbSBarry Smith {
39544a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
3961eb62cbbSBarry Smith   int        ierr;
39778b31e54SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first");
39806be10caSBarry Smith   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
39978b31e54SBarry Smith   CHKERRQ(ierr);
40078b31e54SBarry Smith   ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr);
40106be10caSBarry Smith   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
40278b31e54SBarry Smith   CHKERRQ(ierr);
40378b31e54SBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr);
4041eb62cbbSBarry Smith   return 0;
4051eb62cbbSBarry Smith }
4061eb62cbbSBarry Smith 
40744a69424SLois Curfman McInnes static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
408da3a660dSBarry Smith {
40944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
410da3a660dSBarry Smith   int        ierr;
41178b31e54SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first");
41206be10caSBarry Smith   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
41378b31e54SBarry Smith   CHKERRQ(ierr);
41478b31e54SBarry Smith   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
41506be10caSBarry Smith   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
41678b31e54SBarry Smith   CHKERRQ(ierr);
41778b31e54SBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr);
418da3a660dSBarry Smith   return 0;
419da3a660dSBarry Smith }
420da3a660dSBarry Smith 
42144a69424SLois Curfman McInnes static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
422da3a660dSBarry Smith {
42344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
424da3a660dSBarry Smith   int        ierr;
425da3a660dSBarry Smith 
42644a69424SLois Curfman McInnes   if (!aij->assembled)
42778b31e54SBarry Smith     SETERRQ(1,"MatMulTrans_MPIAIJ: must assemble matrix first");
428da3a660dSBarry Smith   /* do nondiagonal part */
42978b31e54SBarry Smith   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
430da3a660dSBarry Smith   /* send it on its way */
43106be10caSBarry Smith   ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES,
43278b31e54SBarry Smith            (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
433da3a660dSBarry Smith   /* do local part */
43478b31e54SBarry Smith   ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr);
435da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
436da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
437da3a660dSBarry Smith   /* but is not perhaps always true. */
43806be10caSBarry Smith   ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES,
43978b31e54SBarry Smith          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
440da3a660dSBarry Smith   return 0;
441da3a660dSBarry Smith }
442da3a660dSBarry Smith 
44344a69424SLois Curfman McInnes static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
444da3a660dSBarry Smith {
44544a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
446da3a660dSBarry Smith   int        ierr;
447da3a660dSBarry Smith 
44844a69424SLois Curfman McInnes   if (!aij->assembled)
44978b31e54SBarry Smith     SETERRQ(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first");
450da3a660dSBarry Smith   /* do nondiagonal part */
45178b31e54SBarry Smith   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
452da3a660dSBarry Smith   /* send it on its way */
45306be10caSBarry Smith   ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES,
45478b31e54SBarry Smith          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
455da3a660dSBarry Smith   /* do local part */
45678b31e54SBarry Smith   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
457da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
458da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
459da3a660dSBarry Smith   /* but is not perhaps always true. */
46006be10caSBarry Smith   ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES,
46178b31e54SBarry Smith        (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
462da3a660dSBarry Smith   return 0;
463da3a660dSBarry Smith }
464da3a660dSBarry Smith 
4651eb62cbbSBarry Smith /*
4661eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
4671eb62cbbSBarry Smith    diagonal block
4681eb62cbbSBarry Smith */
469d1710a03SLois Curfman McInnes static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v)
4701eb62cbbSBarry Smith {
47144a69424SLois Curfman McInnes   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
47278b31e54SBarry Smith   if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ: must assemble matrix first");
4731eb62cbbSBarry Smith   return MatGetDiagonal(A->A,v);
4741eb62cbbSBarry Smith }
4751eb62cbbSBarry Smith 
47644a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
4771eb62cbbSBarry Smith {
4781eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
47944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
4801eb62cbbSBarry Smith   int        ierr;
481a5a9c739SBarry Smith #if defined(PETSC_LOG)
482a5a9c739SBarry Smith   PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N);
483a5a9c739SBarry Smith #endif
48478b31e54SBarry Smith   PETSCFREE(aij->rowners);
48578b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
48678b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
48778b31e54SBarry Smith   if (aij->colmap) PETSCFREE(aij->colmap);
48878b31e54SBarry Smith   if (aij->garray) PETSCFREE(aij->garray);
4891eb62cbbSBarry Smith   if (aij->lvec) VecDestroy(aij->lvec);
4901eb62cbbSBarry Smith   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
49178b31e54SBarry Smith   PETSCFREE(aij);
492a5a9c739SBarry Smith   PLogObjectDestroy(mat);
493a5a9c739SBarry Smith   PETSCHEADERDESTROY(mat);
4941eb62cbbSBarry Smith   return 0;
4951eb62cbbSBarry Smith }
4967857610eSBarry Smith #include "draw.h"
497ee50ffe9SBarry Smith #include "pviewer.h"
498ee50ffe9SBarry Smith 
49944a69424SLois Curfman McInnes static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
5001eb62cbbSBarry Smith {
5011eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
50244a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5031eb62cbbSBarry Smith   int        ierr;
504d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
5051eb62cbbSBarry Smith 
50678b31e54SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ: must assemble matrix first");
507154123eaSLois Curfman McInnes   if (!viewer) { /* so that viewers may be used from debuggers */
508154123eaSLois Curfman McInnes     viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer;
509154123eaSLois Curfman McInnes   }
510ab254492SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
5117857610eSBarry Smith   if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) {
5124a0ce102SLois Curfman McInnes     FILE *fd = ViewerFileGetPointer_Private(viewer);
5137f813858SBarry Smith     MPIU_Seq_begin(mat->comm,1);
514d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
5151eb62cbbSBarry Smith              aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5161eb62cbbSBarry Smith              aij->cend);
51778b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
51878b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
519d13ab20cSBarry Smith     fflush(fd);
5207f813858SBarry Smith     MPIU_Seq_end(mat->comm,1);
521d13ab20cSBarry Smith   }
5227857610eSBarry Smith   else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) ||
5237857610eSBarry Smith             vobj->cookie == DRAW_COOKIE) {
52495373324SBarry Smith     int numtids = aij->numtids, mytid = aij->mytid;
52595373324SBarry Smith     if (numtids == 1) {
52678b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
52795373324SBarry Smith     }
52895373324SBarry Smith     else {
52995373324SBarry Smith       /* assemble the entire matrix onto first processor. */
53095373324SBarry Smith       Mat     A;
5312ee70a88SLois Curfman McInnes       Mat_AIJ *Aloc;
5322eb8c8abSBarry Smith       int     M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
53395373324SBarry Smith       Scalar  *a;
5342ee70a88SLois Curfman McInnes 
53595373324SBarry Smith       if (!mytid) {
53695373324SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);
53795373324SBarry Smith       }
53895373324SBarry Smith       else {
53995373324SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);
54095373324SBarry Smith       }
54178b31e54SBarry Smith       CHKERRQ(ierr);
54295373324SBarry Smith 
54395373324SBarry Smith       /* copy over the A part */
5442ee70a88SLois Curfman McInnes       Aloc = (Mat_AIJ*) aij->A->data;
5452ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
54695373324SBarry Smith       row = aij->rstart;
54795373324SBarry Smith       for ( i=0; i<ai[m]; i++ ) {aj[i] += aij->cstart - 1;}
54895373324SBarry Smith       for ( i=0; i<m; i++ ) {
54907a0e7adSLois Curfman McInnes         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES);
55078b31e54SBarry Smith         CHKERRQ(ierr);
55195373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
55295373324SBarry Smith       }
5532ee70a88SLois Curfman McInnes       aj = Aloc->j;
55495373324SBarry Smith       for ( i=0; i<ai[m]; i++ ) {aj[i] -= aij->cstart - 1;}
55595373324SBarry Smith 
55695373324SBarry Smith       /* copy over the B part */
5572ee70a88SLois Curfman McInnes       Aloc = (Mat_AIJ*) aij->B->data;
5582ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
55995373324SBarry Smith       row = aij->rstart;
56078b31e54SBarry Smith       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
56195373324SBarry Smith       for ( i=0; i<ai[m]; i++ ) {cols[i] = aij->garray[aj[i]-1];}
56295373324SBarry Smith       for ( i=0; i<m; i++ ) {
56307a0e7adSLois Curfman McInnes         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES);
56478b31e54SBarry Smith         CHKERRQ(ierr);
56595373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
56695373324SBarry Smith       }
56778b31e54SBarry Smith       PETSCFREE(ct);
56878b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
56978b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
57095373324SBarry Smith       if (!mytid) {
57178b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
57295373324SBarry Smith       }
57378b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
57495373324SBarry Smith     }
57595373324SBarry Smith   }
5761eb62cbbSBarry Smith   return 0;
5771eb62cbbSBarry Smith }
5781eb62cbbSBarry Smith 
579d3c9fed9SLois Curfman McInnes extern int MatMarkDiag_AIJ(Mat_AIJ  *);
5801eb62cbbSBarry Smith /*
5811eb62cbbSBarry Smith     This has to provide several versions.
5821eb62cbbSBarry Smith 
5831eb62cbbSBarry Smith      1) per sequential
5841eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
5851eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
586d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
5871eb62cbbSBarry Smith */
588fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
589fc76ce87SLois Curfman McInnes                            double shift,int its,Vec xx)
5908a729477SBarry Smith {
59144a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
592d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
59344a69424SLois Curfman McInnes   Mat_AIJ    *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data;
5946abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
5956abc6512SBarry Smith   int        ierr,*idx, *diag;
5966abc6512SBarry Smith   int        n = mat->n, m = mat->m, i;
597da3a660dSBarry Smith   Vec        tt;
5988a729477SBarry Smith 
59978b31e54SBarry Smith   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ: must assemble matrix first");
6001eb62cbbSBarry Smith 
601d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
602d6dfbf8fSBarry Smith   xs = x -1; /* shift by one for index start of 1 */
603da3a660dSBarry Smith   ls--;
604d3c9fed9SLois Curfman McInnes   if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;}
605d6dfbf8fSBarry Smith   diag = A->diag;
606acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
60778b31e54SBarry Smith     SETERRQ(1,"That option not yet support for parallel AIJ matrices");
608acb40c82SBarry Smith   }
609da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
610da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
611da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
612da3a660dSBarry Smith 
613da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
614da3a660dSBarry Smith 
615da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
616da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
617da3a660dSBarry Smith     is the relaxation factor.
618da3a660dSBarry Smith     */
61978b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
620da3a660dSBarry Smith     VecGetArray(tt,&t);
621da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
622da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
623da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
62406be10caSBarry Smith     ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
62578b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
626da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
627da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
628da3a660dSBarry Smith       idx  = A->j + diag[i];
629da3a660dSBarry Smith       v    = A->a + diag[i];
630da3a660dSBarry Smith       sum  = b[i];
631da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
632da3a660dSBarry Smith       d    = shift + A->a[diag[i]-1];
633da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
634da3a660dSBarry Smith       idx  = B->j + B->i[i] - 1;
635da3a660dSBarry Smith       v    = B->a + B->i[i] - 1;
636da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
637da3a660dSBarry Smith       x[i] = omega*(sum/d);
638da3a660dSBarry Smith     }
639edd2f0e1SBarry Smith     ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
64078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
641da3a660dSBarry Smith 
642da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
643da3a660dSBarry Smith     v = A->a;
644da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
645da3a660dSBarry Smith 
646da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
647da3a660dSBarry Smith     ts = t - 1; /* shifted by one for index start of a or mat->j*/
648da3a660dSBarry Smith     diag = A->diag;
649da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
65006be10caSBarry Smith     ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
65178b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
652da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
653da3a660dSBarry Smith       n    = diag[i] - A->i[i];
654da3a660dSBarry Smith       idx  = A->j + A->i[i] - 1;
655da3a660dSBarry Smith       v    = A->a + A->i[i] - 1;
656da3a660dSBarry Smith       sum  = t[i];
657da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
658da3a660dSBarry Smith       d    = shift + A->a[diag[i]-1];
659da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
660da3a660dSBarry Smith       idx  = B->j + B->i[i] - 1;
661da3a660dSBarry Smith       v    = B->a + B->i[i] - 1;
662da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
663da3a660dSBarry Smith       t[i] = omega*(sum/d);
664da3a660dSBarry Smith     }
665edd2f0e1SBarry Smith     ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
66678b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
667da3a660dSBarry Smith     /*  x = x + t */
668da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
669da3a660dSBarry Smith     VecDestroy(tt);
670da3a660dSBarry Smith     return 0;
671da3a660dSBarry Smith   }
672da3a660dSBarry Smith 
6731eb62cbbSBarry Smith 
674d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
675da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
676da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
677da3a660dSBarry Smith     }
678da3a660dSBarry Smith     else {
67906be10caSBarry Smith       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
68078b31e54SBarry Smith       CHKERRQ(ierr);
68106be10caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
68278b31e54SBarry Smith       CHKERRQ(ierr);
683da3a660dSBarry Smith     }
684d6dfbf8fSBarry Smith     while (its--) {
685d6dfbf8fSBarry Smith       /* go down through the rows */
68606be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
68778b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
688d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
689d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
690d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
691d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
692d6dfbf8fSBarry Smith         sum  = b[i];
693d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
694d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
695d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
696d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
697d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
698d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
699d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
700d6dfbf8fSBarry Smith       }
701edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
70278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
703d6dfbf8fSBarry Smith       /* come up through the rows */
70406be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
70578b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
706d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
707d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
708d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
709d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
710d6dfbf8fSBarry Smith         sum  = b[i];
711d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
712d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
713d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
714d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
715d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
716d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
717d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
718d6dfbf8fSBarry Smith       }
719edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
72078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
721d6dfbf8fSBarry Smith     }
722d6dfbf8fSBarry Smith   }
723d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
724da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
725da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
72606be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
72778b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
728da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
729da3a660dSBarry Smith         n    = diag[i] - A->i[i];
730da3a660dSBarry Smith         idx  = A->j + A->i[i] - 1;
731da3a660dSBarry Smith         v    = A->a + A->i[i] - 1;
732da3a660dSBarry Smith         sum  = b[i];
733da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
734da3a660dSBarry Smith         d    = shift + A->a[diag[i]-1];
735da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
736da3a660dSBarry Smith         idx  = B->j + B->i[i] - 1;
737da3a660dSBarry Smith         v    = B->a + B->i[i] - 1;
738da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
739da3a660dSBarry Smith         x[i] = omega*(sum/d);
740da3a660dSBarry Smith       }
741edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
74278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
743da3a660dSBarry Smith       its--;
744da3a660dSBarry Smith     }
745d6dfbf8fSBarry Smith     while (its--) {
74606be10caSBarry Smith       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
74778b31e54SBarry Smith       CHKERRQ(ierr);
74806be10caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
74978b31e54SBarry Smith       CHKERRQ(ierr);
75006be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
75178b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
752d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
753d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
754d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
755d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
756d6dfbf8fSBarry Smith         sum  = b[i];
757d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
758d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
759d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
760d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
761d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
762d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
763d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
764d6dfbf8fSBarry Smith       }
765edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
76678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
767d6dfbf8fSBarry Smith     }
768d6dfbf8fSBarry Smith   }
769d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
770da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
771da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
77206be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
77378b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
774da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
775da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
776da3a660dSBarry Smith         idx  = A->j + diag[i];
777da3a660dSBarry Smith         v    = A->a + diag[i];
778da3a660dSBarry Smith         sum  = b[i];
779da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
780da3a660dSBarry Smith         d    = shift + A->a[diag[i]-1];
781da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
782da3a660dSBarry Smith         idx  = B->j + B->i[i] - 1;
783da3a660dSBarry Smith         v    = B->a + B->i[i] - 1;
784da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
785da3a660dSBarry Smith         x[i] = omega*(sum/d);
786da3a660dSBarry Smith       }
787edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
78878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
789da3a660dSBarry Smith       its--;
790da3a660dSBarry Smith     }
791d6dfbf8fSBarry Smith     while (its--) {
79206be10caSBarry Smith       ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
79378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
79406be10caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
79578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
79606be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
79778b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
798d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
799d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
800d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
801d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
802d6dfbf8fSBarry Smith         sum  = b[i];
803d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
804d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
805d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
806d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
807d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
808d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
809d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
810d6dfbf8fSBarry Smith       }
811edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
81278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
813d6dfbf8fSBarry Smith     }
814d6dfbf8fSBarry Smith   }
815d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
816da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
817da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
818da3a660dSBarry Smith     }
81906be10caSBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
82078b31e54SBarry Smith     CHKERRQ(ierr);
82106be10caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
82278b31e54SBarry Smith     CHKERRQ(ierr);
823d6dfbf8fSBarry Smith     while (its--) {
824d6dfbf8fSBarry Smith       /* go down through the rows */
825d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
826d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
827d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
828d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
829d6dfbf8fSBarry Smith         sum  = b[i];
830d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
831d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
832d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
833d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
834d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
835d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
836d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
837d6dfbf8fSBarry Smith       }
838d6dfbf8fSBarry Smith       /* come up through the rows */
839d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
840d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
841d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
842d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
843d6dfbf8fSBarry Smith         sum  = b[i];
844d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
845d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
846d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
847d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
848d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
849d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
850d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
851d6dfbf8fSBarry Smith       }
852d6dfbf8fSBarry Smith     }
853d6dfbf8fSBarry Smith   }
854d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
855da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
856da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
857da3a660dSBarry Smith     }
85806be10caSBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
85978b31e54SBarry Smith     CHKERRQ(ierr);
86006be10caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
86178b31e54SBarry Smith     CHKERRQ(ierr);
862d6dfbf8fSBarry Smith     while (its--) {
863d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
864d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
865d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
866d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
867d6dfbf8fSBarry Smith         sum  = b[i];
868d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
869d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
870d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
871d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
872d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
873d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
874d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
875d6dfbf8fSBarry Smith       }
876d6dfbf8fSBarry Smith     }
877d6dfbf8fSBarry Smith   }
878d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
879da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
880da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
881da3a660dSBarry Smith     }
88206be10caSBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,
88378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
88406be10caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,
88578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
886d6dfbf8fSBarry Smith     while (its--) {
887d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
888d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
889d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
890d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
891d6dfbf8fSBarry Smith         sum  = b[i];
892d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
893d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
894d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
895d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
896d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
897d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
898d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
899d6dfbf8fSBarry Smith       }
900d6dfbf8fSBarry Smith     }
901d6dfbf8fSBarry Smith   }
9028a729477SBarry Smith   return 0;
9038a729477SBarry Smith }
904a66be287SLois Curfman McInnes 
905fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
906fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
907a66be287SLois Curfman McInnes {
908a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
909a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
910a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
911a66be287SLois Curfman McInnes 
91278b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
91378b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
914a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
915a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
916a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
917a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
918a66be287SLois Curfman McInnes     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm);
919a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
920a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
921a66be287SLois Curfman McInnes     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm);
922a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
923a66be287SLois Curfman McInnes   }
924a66be287SLois Curfman McInnes   return 0;
925a66be287SLois Curfman McInnes }
926a66be287SLois Curfman McInnes 
927fc76ce87SLois Curfman McInnes static int MatSetOption_MPIAIJ(Mat aijin,MatOption op)
928c74985f6SBarry Smith {
92944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
930c74985f6SBarry Smith 
931c74985f6SBarry Smith   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
932c74985f6SBarry Smith     MatSetOption(aij->A,op);
933c74985f6SBarry Smith     MatSetOption(aij->B,op);
934c74985f6SBarry Smith   }
935c74985f6SBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) {
936c74985f6SBarry Smith     MatSetOption(aij->A,op);
937c74985f6SBarry Smith     MatSetOption(aij->B,op);
938c74985f6SBarry Smith   }
93978b31e54SBarry Smith   else if (op == COLUMN_ORIENTED) SETERRQ(1,"Column oriented not supported");
940c74985f6SBarry Smith   return 0;
941c74985f6SBarry Smith }
942c74985f6SBarry Smith 
943d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
944c74985f6SBarry Smith {
94544a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
946c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
947c74985f6SBarry Smith   return 0;
948c74985f6SBarry Smith }
949c74985f6SBarry Smith 
950d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
951c74985f6SBarry Smith {
95244a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
953c74985f6SBarry Smith   *m = mat->m; *n = mat->n;
954c74985f6SBarry Smith   return 0;
955c74985f6SBarry Smith }
956c74985f6SBarry Smith 
957d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
958c74985f6SBarry Smith {
95944a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
960c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
961c74985f6SBarry Smith   return 0;
962c74985f6SBarry Smith }
963c74985f6SBarry Smith 
96439e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
96539e00950SLois Curfman McInnes {
966154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
967154123eaSLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
968154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
969154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
97039e00950SLois Curfman McInnes 
971154123eaSLois Curfman McInnes   if (!mat->assembled)
97278b31e54SBarry Smith     SETERRQ(1,"MatGetRow_MPIAIJ: Must assemble matrix first.");
97339e00950SLois Curfman McInnes   if (row < rstart || row >= rend)
97478b31e54SBarry Smith     SETERRQ(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.")
975abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
97639e00950SLois Curfman McInnes 
977154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
978154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
979154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
98078b31e54SBarry Smith   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
98178b31e54SBarry Smith   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
982154123eaSLois Curfman McInnes   nztot = nzA + nzB;
983154123eaSLois Curfman McInnes 
984154123eaSLois Curfman McInnes   if (v  || idx) {
985154123eaSLois Curfman McInnes     if (nztot) {
986154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
987154123eaSLois Curfman McInnes       int imark, imark2;
988154123eaSLois Curfman McInnes       for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
989154123eaSLois Curfman McInnes       if (v) {
99078b31e54SBarry Smith         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
99139e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
992154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
993154123eaSLois Curfman McInnes           else break;
994154123eaSLois Curfman McInnes         }
995154123eaSLois Curfman McInnes         imark = i;
996154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
997154123eaSLois Curfman McInnes         imark2 = imark+nzA;
998154123eaSLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[imark2+i] = vworkB[i];
999154123eaSLois Curfman McInnes       }
1000154123eaSLois Curfman McInnes       if (idx) {
100178b31e54SBarry Smith         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1002154123eaSLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1003154123eaSLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1004154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1005154123eaSLois Curfman McInnes           else break;
1006154123eaSLois Curfman McInnes         }
1007154123eaSLois Curfman McInnes         imark = i;
1008154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1009154123eaSLois Curfman McInnes         imark2 = imark+nzA;
1010154123eaSLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[imark2+i] = cworkB[i];
101139e00950SLois Curfman McInnes       }
101239e00950SLois Curfman McInnes     }
101339e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1014154123eaSLois Curfman McInnes   }
101539e00950SLois Curfman McInnes   *nz = nztot;
101678b31e54SBarry Smith   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
101778b31e54SBarry Smith   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
101839e00950SLois Curfman McInnes   return 0;
101939e00950SLois Curfman McInnes }
102039e00950SLois Curfman McInnes 
102139e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
102239e00950SLois Curfman McInnes {
102378b31e54SBarry Smith   if (idx) PETSCFREE(*idx);
102478b31e54SBarry Smith   if (v) PETSCFREE(*v);
102539e00950SLois Curfman McInnes   return 0;
102639e00950SLois Curfman McInnes }
102739e00950SLois Curfman McInnes 
1028fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1029ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1030d6dfbf8fSBarry Smith 
10318a729477SBarry Smith /* -------------------------------------------------------------------*/
10322ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
103339e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
103444a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
103544a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
10361eb62cbbSBarry Smith        0,0,0,0,
10378a729477SBarry Smith        0,0,
103844a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
10398a729477SBarry Smith        0,
1040a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1041d1710a03SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,0,
1042ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
10431eb62cbbSBarry Smith        0,
10442ee70a88SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,0,
1045c74985f6SBarry Smith        0,0,0,0,
1046d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
104777915d1cSLois Curfman McInnes        0,0,
1048ff7509f2SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
10498a729477SBarry Smith 
10508a729477SBarry Smith /*@
1051ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1052ff756334SLois Curfman McInnes    (the default parallel PETSc format).
10538a729477SBarry Smith 
10548a729477SBarry Smith    Input Parameters:
10551eb62cbbSBarry Smith .  comm - MPI communicator
10567d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
10577d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
10587d3e4905SLois Curfman McInnes            if N is given)
10597d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
10607d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
10617d3e4905SLois Curfman McInnes            if n is given)
1062ff756334SLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of matrix
1063ff756334SLois Curfman McInnes            (same for all local rows)
10641eb62cbbSBarry Smith .  d_nzz - number of nonzeros per row in diagonal portion of matrix or null
1065ff756334SLois Curfman McInnes            (possibly different for each row).  You must leave room for the
1066ff756334SLois Curfman McInnes            diagonal entry even if it is zero.
1067ff756334SLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of matrix
1068ff756334SLois Curfman McInnes            (same for all local rows)
10691eb62cbbSBarry Smith .  o_nzz - number of nonzeros per row in off-diagonal portion of matrix
1070ff756334SLois Curfman McInnes            or null (possibly different for each row).
10718a729477SBarry Smith 
1072ff756334SLois Curfman McInnes    Output Parameter:
10738a729477SBarry Smith .  newmat - the matrix
10748a729477SBarry Smith 
1075ff756334SLois Curfman McInnes    Notes:
1076ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1077ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
1078ff756334SLois Curfman McInnes    storage.  That is, the stored row and column indices begin at
1079ff756334SLois Curfman McInnes    one, not zero.
1080ff756334SLois Curfman McInnes 
1081ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1082ff756334SLois Curfman McInnes    (possibly both).
1083ff756334SLois Curfman McInnes 
1084ff756334SLois Curfman McInnes    The user can set d_nz, d_nnz, o_nz, and o_nnz to zero for PETSc to
1085ff756334SLois Curfman McInnes    control dynamic memory allocation.
1086ff756334SLois Curfman McInnes 
1087dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1088ff756334SLois Curfman McInnes 
1089dbd7a890SLois Curfman McInnes .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues()
10908a729477SBarry Smith @*/
10911eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
10921eb62cbbSBarry Smith                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
10938a729477SBarry Smith {
10948a729477SBarry Smith   Mat          mat;
109544a69424SLois Curfman McInnes   Mat_MPIAIJ   *aij;
10966abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
10978a729477SBarry Smith   *newmat         = 0;
109844a69424SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1099a5a9c739SBarry Smith   PLogObjectCreate(mat);
110078b31e54SBarry Smith   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
11018a729477SBarry Smith   mat->ops        = &MatOps;
110244a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
110344a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
11048a729477SBarry Smith   mat->factor     = 0;
1105d6dfbf8fSBarry Smith 
110607a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
11071eb62cbbSBarry Smith   MPI_Comm_rank(comm,&aij->mytid);
11081eb62cbbSBarry Smith   MPI_Comm_size(comm,&aij->numtids);
11091eb62cbbSBarry Smith 
1110dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
11111eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1112d6dfbf8fSBarry Smith     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
1113dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1114dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
11151eb62cbbSBarry Smith   }
1116dbd7a890SLois Curfman McInnes   if (m == PETSC_DECIDE)
1117dbd7a890SLois Curfman McInnes     {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
1118dbd7a890SLois Curfman McInnes   if (n == PETSC_DECIDE)
1119dbd7a890SLois Curfman McInnes     {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
11208a729477SBarry Smith   aij->m = m;
11218a729477SBarry Smith   aij->n = n;
11221eb62cbbSBarry Smith   aij->N = N;
11231eb62cbbSBarry Smith   aij->M = M;
11241eb62cbbSBarry Smith 
11251eb62cbbSBarry Smith   /* build local table of row and column ownerships */
112678b31e54SBarry Smith   aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int));
112778b31e54SBarry Smith   CHKPTRQ(aij->rowners);
11281eb62cbbSBarry Smith   aij->cowners = aij->rowners + aij->numtids + 1;
11291eb62cbbSBarry Smith   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
11301eb62cbbSBarry Smith   aij->rowners[0] = 0;
11311eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
11321eb62cbbSBarry Smith     aij->rowners[i] += aij->rowners[i-1];
11338a729477SBarry Smith   }
11341eb62cbbSBarry Smith   aij->rstart = aij->rowners[aij->mytid];
11351eb62cbbSBarry Smith   aij->rend   = aij->rowners[aij->mytid+1];
11361eb62cbbSBarry Smith   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
11371eb62cbbSBarry Smith   aij->cowners[0] = 0;
11381eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
11391eb62cbbSBarry Smith     aij->cowners[i] += aij->cowners[i-1];
11408a729477SBarry Smith   }
11411eb62cbbSBarry Smith   aij->cstart = aij->cowners[aij->mytid];
11421eb62cbbSBarry Smith   aij->cend   = aij->cowners[aij->mytid+1];
11438a729477SBarry Smith 
11448a729477SBarry Smith 
11456b5873e3SBarry Smith   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A);
114678b31e54SBarry Smith   CHKERRQ(ierr);
1147a5a9c739SBarry Smith   PLogObjectParent(mat,aij->A);
11486b5873e3SBarry Smith   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B);
114978b31e54SBarry Smith   CHKERRQ(ierr);
1150a5a9c739SBarry Smith   PLogObjectParent(mat,aij->B);
11518a729477SBarry Smith 
11521eb62cbbSBarry Smith   /* build cache for off array entries formed */
115378b31e54SBarry Smith   ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr);
11549e25ed09SBarry Smith   aij->colmap    = 0;
11559e25ed09SBarry Smith   aij->garray    = 0;
11568a729477SBarry Smith 
11571eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
11581eb62cbbSBarry Smith   aij->lvec      = 0;
11591eb62cbbSBarry Smith   aij->Mvctx     = 0;
1160d6dfbf8fSBarry Smith   aij->assembled = 0;
11618a729477SBarry Smith 
1162d6dfbf8fSBarry Smith   *newmat = mat;
1163d6dfbf8fSBarry Smith   return 0;
1164d6dfbf8fSBarry Smith }
1165c74985f6SBarry Smith 
1166ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1167d6dfbf8fSBarry Smith {
1168d6dfbf8fSBarry Smith   Mat        mat;
116944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
11702ee70a88SLois Curfman McInnes   int        ierr, len;
1171d6dfbf8fSBarry Smith   *newmat      = 0;
1172d6dfbf8fSBarry Smith 
117378b31e54SBarry Smith   if (!oldmat->assembled) SETERRQ(1,"Cannot copy unassembled matrix");
117444a69424SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1175a5a9c739SBarry Smith   PLogObjectCreate(mat);
117678b31e54SBarry Smith   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1177d6dfbf8fSBarry Smith   mat->ops        = &MatOps;
117844a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
117944a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1180d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1181d6dfbf8fSBarry Smith 
1182d6dfbf8fSBarry Smith   aij->m          = oldmat->m;
1183d6dfbf8fSBarry Smith   aij->n          = oldmat->n;
1184d6dfbf8fSBarry Smith   aij->M          = oldmat->M;
1185d6dfbf8fSBarry Smith   aij->N          = oldmat->N;
1186d6dfbf8fSBarry Smith 
1187d6dfbf8fSBarry Smith   aij->assembled  = 1;
1188d6dfbf8fSBarry Smith   aij->rstart     = oldmat->rstart;
1189d6dfbf8fSBarry Smith   aij->rend       = oldmat->rend;
1190d6dfbf8fSBarry Smith   aij->cstart     = oldmat->cstart;
1191d6dfbf8fSBarry Smith   aij->cend       = oldmat->cend;
1192d6dfbf8fSBarry Smith   aij->numtids    = oldmat->numtids;
1193d6dfbf8fSBarry Smith   aij->mytid      = oldmat->mytid;
119407a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
1195d6dfbf8fSBarry Smith 
119678b31e54SBarry Smith   aij->rowners        = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) );
119778b31e54SBarry Smith   CHKPTRQ(aij->rowners);
119878b31e54SBarry Smith   PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
119978b31e54SBarry Smith   ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr);
12002ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
120178b31e54SBarry Smith     aij->colmap      = (int *) PETSCMALLOC( (aij->N)*sizeof(int) );
120278b31e54SBarry Smith     CHKPTRQ(aij->colmap);
120378b31e54SBarry Smith     PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int));
12042ee70a88SLois Curfman McInnes   } else aij->colmap = 0;
12052ee70a88SLois Curfman McInnes   if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) {
120678b31e54SBarry Smith     aij->garray      = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray);
120778b31e54SBarry Smith     PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int));
12082ee70a88SLois Curfman McInnes   } else aij->garray = 0;
1209d6dfbf8fSBarry Smith 
121078b31e54SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr);
1211a5a9c739SBarry Smith   PLogObjectParent(mat,aij->lvec);
121278b31e54SBarry Smith   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr);
1213a5a9c739SBarry Smith   PLogObjectParent(mat,aij->Mvctx);
121478b31e54SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr);
1215a5a9c739SBarry Smith   PLogObjectParent(mat,aij->A);
121678b31e54SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr);
1217a5a9c739SBarry Smith   PLogObjectParent(mat,aij->B);
12188a729477SBarry Smith   *newmat = mat;
12198a729477SBarry Smith   return 0;
12208a729477SBarry Smith }
1221