xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision edd2f0e15628dd9b7f6ddd2a5df1609eab2c4ee0)
1cb512458SBarry Smith #ifndef lint
2*edd2f0e1SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.42 1995/05/12 04:16:42 bsmith Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
149e25ed09SBarry Smith static int CreateColmap(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1744a69424SLois Curfman McInnes   Mat_AIJ    *B = (Mat_AIJ*) aij->B->data;
189e25ed09SBarry Smith   int        n = B->n,i;
199e25ed09SBarry Smith   aij->colmap = (int *) MALLOC( aij->N*sizeof(int) ); CHKPTR(aij->colmap);
209e25ed09SBarry Smith   MEMSET(aij->colmap,0,aij->N*sizeof(int));
21abc0e9e4SLois Curfman McInnes   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
229e25ed09SBarry Smith   return 0;
239e25ed09SBarry Smith }
249e25ed09SBarry Smith 
252ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
261eb62cbbSBarry Smith                             int *idxn,Scalar *v,InsertMode addv)
278a729477SBarry Smith {
2844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
291eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
301eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
318a729477SBarry Smith 
3207a0e7adSLois Curfman McInnes   if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) {
331eb62cbbSBarry Smith     SETERR(1,"You cannot mix inserts and adds");
348a729477SBarry Smith   }
351eb62cbbSBarry Smith   aij->insertmode = addv;
368a729477SBarry Smith   for ( i=0; i<m; i++ ) {
37da3a660dSBarry Smith     if (idxm[i] < 0) SETERR(1,"Negative row index");
38da3a660dSBarry Smith     if (idxm[i] >= aij->M) SETERR(1,"Row index too large");
391eb62cbbSBarry Smith     if (idxm[i] >= rstart && idxm[i] < rend) {
401eb62cbbSBarry Smith       row = idxm[i] - rstart;
411eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
42da3a660dSBarry Smith         if (idxn[j] < 0) SETERR(1,"Negative column index");
43da3a660dSBarry Smith         if (idxn[j] >= aij->N) SETERR(1,"Column index too large");
441eb62cbbSBarry Smith         if (idxn[j] >= cstart && idxn[j] < cend){
451eb62cbbSBarry Smith           col = idxn[j] - cstart;
461eb62cbbSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
471eb62cbbSBarry Smith         }
481eb62cbbSBarry Smith         else {
49d6dfbf8fSBarry Smith           if (aij->assembled) {
509e25ed09SBarry Smith             if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);}
519e25ed09SBarry Smith             col = aij->colmap[idxn[j]] - 1;
529e25ed09SBarry Smith             if (col < 0) {
539e25ed09SBarry Smith               SETERR(1,"Cannot insert new off diagonal block nonzero in\
549e25ed09SBarry Smith                      already\
55d6dfbf8fSBarry Smith                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
56d6dfbf8fSBarry Smith                      if your need this feature");
57d6dfbf8fSBarry Smith             }
589e25ed09SBarry Smith           }
599e25ed09SBarry Smith           else col = idxn[j];
601eb62cbbSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
611eb62cbbSBarry Smith         }
621eb62cbbSBarry Smith       }
631eb62cbbSBarry Smith     }
641eb62cbbSBarry Smith     else {
65dbd7a890SLois Curfman McInnes       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);
66dbd7a890SLois Curfman McInnes       CHKERR(ierr);
671eb62cbbSBarry Smith     }
688a729477SBarry Smith   }
698a729477SBarry Smith   return 0;
708a729477SBarry Smith }
718a729477SBarry Smith 
728a729477SBarry Smith /*
731eb62cbbSBarry Smith     the assembly code is a lot like the code for vectors, we should
741eb62cbbSBarry Smith     sometime derive a single assembly code that can be used for
751eb62cbbSBarry Smith     either case.
768a729477SBarry Smith */
778a729477SBarry Smith 
78fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798a729477SBarry Smith {
8044a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
81d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
826abc6512SBarry Smith   int         numtids = aij->numtids, *owners = aij->rowners;
831eb62cbbSBarry Smith   int         mytid = aij->mytid;
841eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
856abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
86dbd7a890SLois Curfman McInnes   int         tag = 50, *owner,*starts,count,ierr;
871eb62cbbSBarry Smith   InsertMode  addv;
881eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
891eb62cbbSBarry Smith 
901eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
9128988994SBarry Smith   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
921eb62cbbSBarry Smith                 MPI_BOR,comm);
939d00d63dSBarry Smith   if (addv == (ADDVALUES|INSERTVALUES)) {
941eb62cbbSBarry Smith     SETERR(1,"Some processors have inserted while others have added");
951eb62cbbSBarry Smith   }
961eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
971eb62cbbSBarry Smith 
981eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
991eb62cbbSBarry Smith   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
1001eb62cbbSBarry Smith   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
1011eb62cbbSBarry Smith   owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner);
1021eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1031eb62cbbSBarry Smith     idx = aij->stash.idx[i];
1041eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
1051eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1061eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1078a729477SBarry Smith       }
1088a729477SBarry Smith     }
1098a729477SBarry Smith   }
1101eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
1111eb62cbbSBarry Smith 
1121eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1131eb62cbbSBarry Smith   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
1141eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
1151eb62cbbSBarry Smith   nreceives = work[mytid];
1161eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
1171eb62cbbSBarry Smith   nmax = work[mytid];
1181eb62cbbSBarry Smith   FREE(work);
1191eb62cbbSBarry Smith 
1201eb62cbbSBarry Smith   /* post receives:
1211eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1221eb62cbbSBarry Smith      (global index,value) we store the global index as a double
123d6dfbf8fSBarry Smith      to simplify the message passing.
1241eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1251eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1261eb62cbbSBarry Smith      this is a lot of wasted space.
1271eb62cbbSBarry Smith 
1281eb62cbbSBarry Smith 
1291eb62cbbSBarry Smith        This could be done better.
1301eb62cbbSBarry Smith   */
13128988994SBarry Smith   rvalues = (Scalar *) MALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
1321eb62cbbSBarry Smith   CHKPTR(rvalues);
1331eb62cbbSBarry Smith   recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request));
1341eb62cbbSBarry Smith   CHKPTR(recv_waits);
1351eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
1361eb62cbbSBarry Smith     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag,
1371eb62cbbSBarry Smith               comm,recv_waits+i);
1381eb62cbbSBarry Smith   }
1391eb62cbbSBarry Smith 
1401eb62cbbSBarry Smith   /* do sends:
1411eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1421eb62cbbSBarry Smith          the ith processor
1431eb62cbbSBarry Smith   */
1441eb62cbbSBarry Smith   svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
1451eb62cbbSBarry Smith   CHKPTR(svalues);
1461eb62cbbSBarry Smith   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
1471eb62cbbSBarry Smith   CHKPTR(send_waits);
1481eb62cbbSBarry Smith   starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts);
1491eb62cbbSBarry Smith   starts[0] = 0;
1501eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1511eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1521eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
1531eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
1541eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
1551eb62cbbSBarry Smith   }
1561eb62cbbSBarry Smith   FREE(owner);
1571eb62cbbSBarry Smith   starts[0] = 0;
1581eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1591eb62cbbSBarry Smith   count = 0;
1601eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
1611eb62cbbSBarry Smith     if (procs[i]) {
1621eb62cbbSBarry Smith       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag,
1631eb62cbbSBarry Smith                 comm,send_waits+count++);
1641eb62cbbSBarry Smith     }
1651eb62cbbSBarry Smith   }
1661eb62cbbSBarry Smith   FREE(starts); FREE(nprocs);
1671eb62cbbSBarry Smith 
1681eb62cbbSBarry Smith   /* Free cache space */
169dbd7a890SLois Curfman McInnes   ierr = StashDestroy_Private(&aij->stash); CHKERR(ierr);
1701eb62cbbSBarry Smith 
1711eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues = rvalues;
1721eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs = nreceives;
1731eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
1741eb62cbbSBarry Smith   aij->rmax       = nmax;
1751eb62cbbSBarry Smith 
1761eb62cbbSBarry Smith   return 0;
1771eb62cbbSBarry Smith }
17844a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
1791eb62cbbSBarry Smith 
180fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
1811eb62cbbSBarry Smith {
1821eb62cbbSBarry Smith   int        ierr;
18344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1841eb62cbbSBarry Smith 
1851eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
1866abc6512SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
1871eb62cbbSBarry Smith   int         row,col;
1881eb62cbbSBarry Smith   Scalar      *values,val;
1891eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
1901eb62cbbSBarry Smith 
1911eb62cbbSBarry Smith   /*  wait on receives */
1921eb62cbbSBarry Smith   while (count) {
193d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
1941eb62cbbSBarry Smith     /* unpack receives into our local space */
195d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
1961eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_SCALAR,&n);
1971eb62cbbSBarry Smith     n = n/3;
1981eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
1991eb62cbbSBarry Smith       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
2001eb62cbbSBarry Smith       col = (int) PETSCREAL(values[3*i+1]);
2011eb62cbbSBarry Smith       val = values[3*i+2];
2021eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2031eb62cbbSBarry Smith           col -= aij->cstart;
2041eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2051eb62cbbSBarry Smith       }
2061eb62cbbSBarry Smith       else {
207d6dfbf8fSBarry Smith         if (aij->assembled) {
2089e25ed09SBarry Smith           if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);}
2099e25ed09SBarry Smith           col = aij->colmap[col] - 1;
2109e25ed09SBarry Smith           if (col < 0) {
2119e25ed09SBarry Smith             SETERR(1,"Cannot insert new off diagonal block nonzero in\
2129e25ed09SBarry Smith                      already\
213d6dfbf8fSBarry Smith                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
214d6dfbf8fSBarry Smith                      if your need this feature");
215d6dfbf8fSBarry Smith           }
2169e25ed09SBarry Smith         }
2171eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2181eb62cbbSBarry Smith       }
2191eb62cbbSBarry Smith     }
2201eb62cbbSBarry Smith     count--;
2211eb62cbbSBarry Smith   }
2221eb62cbbSBarry Smith   FREE(aij->recv_waits); FREE(aij->rvalues);
2231eb62cbbSBarry Smith 
2241eb62cbbSBarry Smith   /* wait on sends */
2251eb62cbbSBarry Smith   if (aij->nsends) {
2261eb62cbbSBarry Smith     send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) );
2271eb62cbbSBarry Smith     CHKPTR(send_status);
2281eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2291eb62cbbSBarry Smith     FREE(send_status);
2301eb62cbbSBarry Smith   }
2311eb62cbbSBarry Smith   FREE(aij->send_waits); FREE(aij->svalues);
2321eb62cbbSBarry Smith 
23307a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
234ee50ffe9SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERR(ierr);
235ee50ffe9SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERR(ierr);
2361eb62cbbSBarry Smith 
2375e42470aSBarry Smith   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
238d3c9fed9SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERR(ierr);
239d6dfbf8fSBarry Smith     aij->assembled = 1;
2405e42470aSBarry Smith   }
241ee50ffe9SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERR(ierr);
242ee50ffe9SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERR(ierr);
2435e42470aSBarry Smith 
2448a729477SBarry Smith   return 0;
2458a729477SBarry Smith }
2468a729477SBarry Smith 
2472ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
2481eb62cbbSBarry Smith {
24944a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
250dbd7a890SLois Curfman McInnes   int ierr;
251dbd7a890SLois Curfman McInnes   ierr = MatZeroEntries(l->A); CHKERR(ierr);
252dbd7a890SLois Curfman McInnes   ierr = MatZeroEntries(l->B); CHKERR(ierr);
2531eb62cbbSBarry Smith   return 0;
2541eb62cbbSBarry Smith }
2551eb62cbbSBarry Smith 
2561eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and
2571eb62cbbSBarry Smith    scatter create routines, we should try to do it systemamatically
2581eb62cbbSBarry Smith    if we can figure out the proper level of generality. */
2591eb62cbbSBarry Smith 
2601eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
2611eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
2621eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
2631eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
2641eb62cbbSBarry Smith    routine.
2651eb62cbbSBarry Smith */
26644a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
2671eb62cbbSBarry Smith {
26844a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
2691eb62cbbSBarry Smith   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
2706abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
2711eb62cbbSBarry Smith   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
2726abc6512SBarry Smith   int            *rvalues,tag = 67,count,base,slen,n,*source;
273d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
274d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
2751eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
2761eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
2771eb62cbbSBarry Smith   IS             istmp;
2781eb62cbbSBarry Smith 
27944a69424SLois Curfman McInnes   if (!l->assembled) SETERR(1,"MatZeroRows_MPIAIJ: must assemble matrix first");
2801eb62cbbSBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERR(ierr);
2811eb62cbbSBarry Smith   ierr = ISGetIndices(is,&rows); CHKERR(ierr);
2821eb62cbbSBarry Smith 
2831eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
2841eb62cbbSBarry Smith   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
2851eb62cbbSBarry Smith   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
2861eb62cbbSBarry Smith   owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/
2871eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
2881eb62cbbSBarry Smith     idx = rows[i];
2891eb62cbbSBarry Smith     found = 0;
2901eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
2911eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
2921eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2931eb62cbbSBarry Smith       }
2941eb62cbbSBarry Smith     }
295d6dfbf8fSBarry Smith     if (!found) SETERR(1,"Imdex out of range");
2961eb62cbbSBarry Smith   }
2971eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
2981eb62cbbSBarry Smith 
2991eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3001eb62cbbSBarry Smith   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
3011eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
3021eb62cbbSBarry Smith   nrecvs = work[mytid];
3031eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
3041eb62cbbSBarry Smith   nmax = work[mytid];
3051eb62cbbSBarry Smith   FREE(work);
3061eb62cbbSBarry Smith 
3071eb62cbbSBarry Smith   /* post receives:   */
30828988994SBarry Smith   rvalues = (int *) MALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
3091eb62cbbSBarry Smith   CHKPTR(rvalues);
3101eb62cbbSBarry Smith   recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request));
3111eb62cbbSBarry Smith   CHKPTR(recv_waits);
3121eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3131eb62cbbSBarry Smith     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
3141eb62cbbSBarry Smith               comm,recv_waits+i);
3151eb62cbbSBarry Smith   }
3161eb62cbbSBarry Smith 
3171eb62cbbSBarry Smith   /* do sends:
3181eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3191eb62cbbSBarry Smith          the ith processor
3201eb62cbbSBarry Smith   */
3211eb62cbbSBarry Smith   svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues);
3221eb62cbbSBarry Smith   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
3231eb62cbbSBarry Smith   CHKPTR(send_waits);
3241eb62cbbSBarry Smith   starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts);
3251eb62cbbSBarry Smith   starts[0] = 0;
3261eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3271eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3281eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3291eb62cbbSBarry Smith   }
3301eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3311eb62cbbSBarry Smith 
3321eb62cbbSBarry Smith   starts[0] = 0;
3331eb62cbbSBarry Smith   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3341eb62cbbSBarry Smith   count = 0;
3351eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
3361eb62cbbSBarry Smith     if (procs[i]) {
3371eb62cbbSBarry Smith       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
3381eb62cbbSBarry Smith                 comm,send_waits+count++);
3391eb62cbbSBarry Smith     }
3401eb62cbbSBarry Smith   }
3411eb62cbbSBarry Smith   FREE(starts);
3421eb62cbbSBarry Smith 
3431eb62cbbSBarry Smith   base = owners[mytid];
3441eb62cbbSBarry Smith 
3451eb62cbbSBarry Smith   /*  wait on receives */
3461eb62cbbSBarry Smith   lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens);
3471eb62cbbSBarry Smith   source = lens + nrecvs;
3481eb62cbbSBarry Smith   count = nrecvs; slen = 0;
3491eb62cbbSBarry Smith   while (count) {
350d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3511eb62cbbSBarry Smith     /* unpack receives into our local space */
3521eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
353d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
354d6dfbf8fSBarry Smith     lens[imdex]  = n;
3551eb62cbbSBarry Smith     slen += n;
3561eb62cbbSBarry Smith     count--;
3571eb62cbbSBarry Smith   }
3581eb62cbbSBarry Smith   FREE(recv_waits);
3591eb62cbbSBarry Smith 
3601eb62cbbSBarry Smith   /* move the data into the send scatter */
3611eb62cbbSBarry Smith   lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows);
3621eb62cbbSBarry Smith   count = 0;
3631eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3641eb62cbbSBarry Smith     values = rvalues + i*nmax;
3651eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
3661eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
3671eb62cbbSBarry Smith     }
3681eb62cbbSBarry Smith   }
3691eb62cbbSBarry Smith   FREE(rvalues); FREE(lens);
3701eb62cbbSBarry Smith   FREE(owner); FREE(nprocs);
3711eb62cbbSBarry Smith 
3721eb62cbbSBarry Smith   /* actually zap the local rows */
3736b5873e3SBarry Smith   ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp);
3746b5873e3SBarry Smith   CHKERR(ierr);  FREE(lrows);
3751eb62cbbSBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr);
3761eb62cbbSBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr);
3771eb62cbbSBarry Smith   ierr = ISDestroy(istmp); CHKERR(ierr);
3781eb62cbbSBarry Smith 
3791eb62cbbSBarry Smith   /* wait on sends */
3801eb62cbbSBarry Smith   if (nsends) {
3811eb62cbbSBarry Smith     send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) );
3821eb62cbbSBarry Smith     CHKPTR(send_status);
3831eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
3841eb62cbbSBarry Smith     FREE(send_status);
3851eb62cbbSBarry Smith   }
3861eb62cbbSBarry Smith   FREE(send_waits); FREE(svalues);
3871eb62cbbSBarry Smith 
3881eb62cbbSBarry Smith 
3891eb62cbbSBarry Smith   return 0;
3901eb62cbbSBarry Smith }
3911eb62cbbSBarry Smith 
39244a69424SLois Curfman McInnes static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
3931eb62cbbSBarry Smith {
39444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
3951eb62cbbSBarry Smith   int        ierr;
39644a69424SLois Curfman McInnes   if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first");
39706be10caSBarry Smith   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
3981eb62cbbSBarry Smith   CHKERR(ierr);
3991eb62cbbSBarry Smith   ierr = MatMult(aij->A,xx,yy); CHKERR(ierr);
40006be10caSBarry Smith   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
401d6dfbf8fSBarry Smith   CHKERR(ierr);
4021eb62cbbSBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr);
4031eb62cbbSBarry Smith   return 0;
4041eb62cbbSBarry Smith }
4051eb62cbbSBarry Smith 
40644a69424SLois Curfman McInnes static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
407da3a660dSBarry Smith {
40844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
409da3a660dSBarry Smith   int        ierr;
41044a69424SLois Curfman McInnes   if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first");
41106be10caSBarry Smith   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
412da3a660dSBarry Smith   CHKERR(ierr);
413da3a660dSBarry Smith   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr);
41406be10caSBarry Smith   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
415da3a660dSBarry Smith   CHKERR(ierr);
416da3a660dSBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr);
417da3a660dSBarry Smith   return 0;
418da3a660dSBarry Smith }
419da3a660dSBarry Smith 
42044a69424SLois Curfman McInnes static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
421da3a660dSBarry Smith {
42244a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
423da3a660dSBarry Smith   int        ierr;
424da3a660dSBarry Smith 
42544a69424SLois Curfman McInnes   if (!aij->assembled)
42644a69424SLois Curfman McInnes     SETERR(1,"MatMulTrans_MPIAIJ: must assemble matrix first");
427da3a660dSBarry Smith   /* do nondiagonal part */
428da3a660dSBarry Smith   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
429da3a660dSBarry Smith   /* send it on its way */
43006be10caSBarry Smith   ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES,
431a8de2c74SBarry Smith            (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERR(ierr);
432da3a660dSBarry Smith   /* do local part */
433da3a660dSBarry Smith   ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr);
434da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
435da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
436da3a660dSBarry Smith   /* but is not perhaps always true. */
43706be10caSBarry Smith   ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES,
438a8de2c74SBarry Smith          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERR(ierr);
439da3a660dSBarry Smith   return 0;
440da3a660dSBarry Smith }
441da3a660dSBarry Smith 
44244a69424SLois Curfman McInnes static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
443da3a660dSBarry Smith {
44444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
445da3a660dSBarry Smith   int        ierr;
446da3a660dSBarry Smith 
44744a69424SLois Curfman McInnes   if (!aij->assembled)
44844a69424SLois Curfman McInnes     SETERR(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first");
449da3a660dSBarry Smith   /* do nondiagonal part */
450da3a660dSBarry Smith   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
451da3a660dSBarry Smith   /* send it on its way */
45206be10caSBarry Smith   ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES,
453a8de2c74SBarry Smith          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERR(ierr);
454da3a660dSBarry Smith   /* do local part */
455da3a660dSBarry Smith   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr);
456da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
457da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
458da3a660dSBarry Smith   /* but is not perhaps always true. */
45906be10caSBarry Smith   ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES,
460a8de2c74SBarry Smith        (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERR(ierr);
461da3a660dSBarry Smith   return 0;
462da3a660dSBarry Smith }
463da3a660dSBarry Smith 
4641eb62cbbSBarry Smith /*
4651eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
4661eb62cbbSBarry Smith    diagonal block
4671eb62cbbSBarry Smith */
468d1710a03SLois Curfman McInnes static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v)
4691eb62cbbSBarry Smith {
47044a69424SLois Curfman McInnes   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
47144a69424SLois Curfman McInnes   if (!A->assembled) SETERR(1,"MatGetDiag_MPIAIJ: must assemble matrix first");
4721eb62cbbSBarry Smith   return MatGetDiagonal(A->A,v);
4731eb62cbbSBarry Smith }
4741eb62cbbSBarry Smith 
47544a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
4761eb62cbbSBarry Smith {
4771eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
47844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
4791eb62cbbSBarry Smith   int        ierr;
480a5a9c739SBarry Smith #if defined(PETSC_LOG)
481a5a9c739SBarry Smith   PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N);
482a5a9c739SBarry Smith #endif
4831eb62cbbSBarry Smith   FREE(aij->rowners);
4841eb62cbbSBarry Smith   ierr = MatDestroy(aij->A); CHKERR(ierr);
4851eb62cbbSBarry Smith   ierr = MatDestroy(aij->B); CHKERR(ierr);
4869e25ed09SBarry Smith   if (aij->colmap) FREE(aij->colmap);
4879e25ed09SBarry Smith   if (aij->garray) FREE(aij->garray);
4881eb62cbbSBarry Smith   if (aij->lvec) VecDestroy(aij->lvec);
4891eb62cbbSBarry Smith   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
490a5a9c739SBarry Smith   FREE(aij);
491a5a9c739SBarry Smith   PLogObjectDestroy(mat);
492a5a9c739SBarry Smith   PETSCHEADERDESTROY(mat);
4931eb62cbbSBarry Smith   return 0;
4941eb62cbbSBarry Smith }
4957857610eSBarry Smith #include "draw.h"
496ee50ffe9SBarry Smith #include "pviewer.h"
497ee50ffe9SBarry Smith 
49844a69424SLois Curfman McInnes static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
4991eb62cbbSBarry Smith {
5001eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
50144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5021eb62cbbSBarry Smith   int        ierr;
503d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
5041eb62cbbSBarry Smith 
505ab254492SBarry Smith   if (!aij->assembled) SETERR(1,"MatView_MPIAIJ: must assemble matrix first");
506154123eaSLois Curfman McInnes   if (!viewer) { /* so that viewers may be used from debuggers */
507154123eaSLois Curfman McInnes     viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer;
508154123eaSLois Curfman McInnes   }
509ab254492SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
5107857610eSBarry Smith   if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) {
5114a0ce102SLois Curfman McInnes     FILE *fd = ViewerFileGetPointer_Private(viewer);
512d6dfbf8fSBarry Smith     MPE_Seq_begin(mat->comm,1);
513d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
5141eb62cbbSBarry Smith              aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5151eb62cbbSBarry Smith              aij->cend);
516da69df5fSBarry Smith     ierr = MatView(aij->A,viewer); CHKERR(ierr);
517da69df5fSBarry Smith     ierr = MatView(aij->B,viewer); CHKERR(ierr);
518d13ab20cSBarry Smith     fflush(fd);
519d6dfbf8fSBarry Smith     MPE_Seq_end(mat->comm,1);
520d13ab20cSBarry Smith   }
5217857610eSBarry Smith   else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) ||
5227857610eSBarry Smith             vobj->cookie == DRAW_COOKIE) {
52395373324SBarry Smith     int numtids = aij->numtids, mytid = aij->mytid;
52495373324SBarry Smith     if (numtids == 1) {
52595373324SBarry Smith       ierr = MatView(aij->A,viewer); CHKERR(ierr);
52695373324SBarry Smith     }
52795373324SBarry Smith     else {
52895373324SBarry Smith       /* assemble the entire matrix onto first processor. */
52995373324SBarry Smith       Mat     A;
5302ee70a88SLois Curfman McInnes       Mat_AIJ *Aloc;
5312eb8c8abSBarry Smith       int     M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
53295373324SBarry Smith       Scalar  *a;
5332ee70a88SLois Curfman McInnes 
53495373324SBarry Smith       if (!mytid) {
53595373324SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);
53695373324SBarry Smith       }
53795373324SBarry Smith       else {
53895373324SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);
53995373324SBarry Smith       }
54095373324SBarry Smith       CHKERR(ierr);
54195373324SBarry Smith 
54295373324SBarry Smith       /* copy over the A part */
5432ee70a88SLois Curfman McInnes       Aloc = (Mat_AIJ*) aij->A->data;
5442ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
54595373324SBarry Smith       row = aij->rstart;
54695373324SBarry Smith       for ( i=0; i<ai[m]; i++ ) {aj[i] += aij->cstart - 1;}
54795373324SBarry Smith       for ( i=0; i<m; i++ ) {
54807a0e7adSLois Curfman McInnes         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES);
54995373324SBarry Smith         CHKERR(ierr);
55095373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
55195373324SBarry Smith       }
5522ee70a88SLois Curfman McInnes       aj = Aloc->j;
55395373324SBarry Smith       for ( i=0; i<ai[m]; i++ ) {aj[i] -= aij->cstart - 1;}
55495373324SBarry Smith 
55595373324SBarry Smith       /* copy over the B part */
5562ee70a88SLois Curfman McInnes       Aloc = (Mat_AIJ*) aij->B->data;
5572ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
55895373324SBarry Smith       row = aij->rstart;
55995373324SBarry Smith       ct = cols = (int *) MALLOC( (ai[m]+1)*sizeof(int) ); CHKPTR(cols);
56095373324SBarry Smith       for ( i=0; i<ai[m]; i++ ) {cols[i] = aij->garray[aj[i]-1];}
56195373324SBarry Smith       for ( i=0; i<m; i++ ) {
56207a0e7adSLois Curfman McInnes         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES);
56395373324SBarry Smith         CHKERR(ierr);
56495373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
56595373324SBarry Smith       }
56695373324SBarry Smith       FREE(ct);
567ee50ffe9SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERR(ierr);
568ee50ffe9SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERR(ierr);
56995373324SBarry Smith       if (!mytid) {
57095373324SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERR(ierr);
57195373324SBarry Smith       }
57295373324SBarry Smith       ierr = MatDestroy(A); CHKERR(ierr);
57395373324SBarry Smith     }
57495373324SBarry Smith   }
5751eb62cbbSBarry Smith   return 0;
5761eb62cbbSBarry Smith }
5771eb62cbbSBarry Smith 
578d3c9fed9SLois Curfman McInnes extern int MatMarkDiag_AIJ(Mat_AIJ  *);
5791eb62cbbSBarry Smith /*
5801eb62cbbSBarry Smith     This has to provide several versions.
5811eb62cbbSBarry Smith 
5821eb62cbbSBarry Smith      1) per sequential
5831eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
5841eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
585d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
5861eb62cbbSBarry Smith */
587fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
588fc76ce87SLois Curfman McInnes                            double shift,int its,Vec xx)
5898a729477SBarry Smith {
59044a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
591d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
59244a69424SLois Curfman McInnes   Mat_AIJ    *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data;
5936abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
5946abc6512SBarry Smith   int        ierr,*idx, *diag;
5956abc6512SBarry Smith   int        n = mat->n, m = mat->m, i;
596da3a660dSBarry Smith   Vec        tt;
5978a729477SBarry Smith 
59844a69424SLois Curfman McInnes   if (!mat->assembled) SETERR(1,"MatRelax_MPIAIJ: must assemble matrix first");
5991eb62cbbSBarry Smith 
600d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
601d6dfbf8fSBarry Smith   xs = x -1; /* shift by one for index start of 1 */
602da3a660dSBarry Smith   ls--;
603d3c9fed9SLois Curfman McInnes   if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;}
604d6dfbf8fSBarry Smith   diag = A->diag;
605acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
606acb40c82SBarry Smith     SETERR(1,"That option not yet support for parallel AIJ matrices");
607acb40c82SBarry Smith   }
608da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
609da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
610da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
611da3a660dSBarry Smith 
612da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
613da3a660dSBarry Smith 
614da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
615da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
616da3a660dSBarry Smith     is the relaxation factor.
617da3a660dSBarry Smith     */
6186469c4f9SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERR(ierr);
619da3a660dSBarry Smith     VecGetArray(tt,&t);
620da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
621da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
622da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
62306be10caSBarry Smith     ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
624da3a660dSBarry Smith                               mat->Mvctx); CHKERR(ierr);
625da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
626da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
627da3a660dSBarry Smith       idx  = A->j + diag[i];
628da3a660dSBarry Smith       v    = A->a + diag[i];
629da3a660dSBarry Smith       sum  = b[i];
630da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
631da3a660dSBarry Smith       d    = shift + A->a[diag[i]-1];
632da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
633da3a660dSBarry Smith       idx  = B->j + B->i[i] - 1;
634da3a660dSBarry Smith       v    = B->a + B->i[i] - 1;
635da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
636da3a660dSBarry Smith       x[i] = omega*(sum/d);
637da3a660dSBarry Smith     }
638*edd2f0e1SBarry Smith     ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
639da3a660dSBarry Smith                             mat->Mvctx); CHKERR(ierr);
640da3a660dSBarry Smith 
641da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
642da3a660dSBarry Smith     v = A->a;
643da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
644da3a660dSBarry Smith 
645da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
646da3a660dSBarry Smith     ts = t - 1; /* shifted by one for index start of a or mat->j*/
647da3a660dSBarry Smith     diag = A->diag;
648da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
64906be10caSBarry Smith     ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
650da3a660dSBarry Smith                                                  mat->Mvctx); CHKERR(ierr);
651da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
652da3a660dSBarry Smith       n    = diag[i] - A->i[i];
653da3a660dSBarry Smith       idx  = A->j + A->i[i] - 1;
654da3a660dSBarry Smith       v    = A->a + A->i[i] - 1;
655da3a660dSBarry Smith       sum  = t[i];
656da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
657da3a660dSBarry Smith       d    = shift + A->a[diag[i]-1];
658da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
659da3a660dSBarry Smith       idx  = B->j + B->i[i] - 1;
660da3a660dSBarry Smith       v    = B->a + B->i[i] - 1;
661da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
662da3a660dSBarry Smith       t[i] = omega*(sum/d);
663da3a660dSBarry Smith     }
664*edd2f0e1SBarry Smith     ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
665da3a660dSBarry Smith                                                     mat->Mvctx); CHKERR(ierr);
666da3a660dSBarry Smith     /*  x = x + t */
667da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
668da3a660dSBarry Smith     VecDestroy(tt);
669da3a660dSBarry Smith     return 0;
670da3a660dSBarry Smith   }
671da3a660dSBarry Smith 
6721eb62cbbSBarry Smith 
673d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
674da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
675da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
676da3a660dSBarry Smith     }
677da3a660dSBarry Smith     else {
67806be10caSBarry Smith       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
679d6dfbf8fSBarry Smith       CHKERR(ierr);
68006be10caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
681d6dfbf8fSBarry Smith       CHKERR(ierr);
682da3a660dSBarry Smith     }
683d6dfbf8fSBarry Smith     while (its--) {
684d6dfbf8fSBarry Smith       /* go down through the rows */
68506be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
686d6dfbf8fSBarry Smith                               mat->Mvctx); CHKERR(ierr);
687d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
688d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
689d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
690d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
691d6dfbf8fSBarry Smith         sum  = b[i];
692d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
693d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
694d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
695d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
696d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
697d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
698d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
699d6dfbf8fSBarry Smith       }
700*edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
701d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
702d6dfbf8fSBarry Smith       /* come up through the rows */
70306be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
704d6dfbf8fSBarry Smith                               mat->Mvctx); CHKERR(ierr);
705d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
706d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
707d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
708d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
709d6dfbf8fSBarry Smith         sum  = b[i];
710d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
711d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
712d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
713d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
714d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
715d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
716d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
717d6dfbf8fSBarry Smith       }
718*edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
719d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
720d6dfbf8fSBarry Smith     }
721d6dfbf8fSBarry Smith   }
722d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
723da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
724da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
72506be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
726da3a660dSBarry Smith                               mat->Mvctx); CHKERR(ierr);
727da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
728da3a660dSBarry Smith         n    = diag[i] - A->i[i];
729da3a660dSBarry Smith         idx  = A->j + A->i[i] - 1;
730da3a660dSBarry Smith         v    = A->a + A->i[i] - 1;
731da3a660dSBarry Smith         sum  = b[i];
732da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
733da3a660dSBarry Smith         d    = shift + A->a[diag[i]-1];
734da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
735da3a660dSBarry Smith         idx  = B->j + B->i[i] - 1;
736da3a660dSBarry Smith         v    = B->a + B->i[i] - 1;
737da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
738da3a660dSBarry Smith         x[i] = omega*(sum/d);
739da3a660dSBarry Smith       }
740*edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
741da3a660dSBarry Smith                             mat->Mvctx); CHKERR(ierr);
742da3a660dSBarry Smith       its--;
743da3a660dSBarry Smith     }
744d6dfbf8fSBarry Smith     while (its--) {
74506be10caSBarry Smith       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
746d6dfbf8fSBarry Smith       CHKERR(ierr);
74706be10caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
748d6dfbf8fSBarry Smith       CHKERR(ierr);
74906be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
750d6dfbf8fSBarry Smith                               mat->Mvctx); CHKERR(ierr);
751d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
752d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
753d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
754d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
755d6dfbf8fSBarry Smith         sum  = b[i];
756d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
757d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
758d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
759d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
760d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
761d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
762d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
763d6dfbf8fSBarry Smith       }
764*edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
765d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
766d6dfbf8fSBarry Smith     }
767d6dfbf8fSBarry Smith   }
768d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
769da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
770da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
77106be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
772da3a660dSBarry Smith                               mat->Mvctx); CHKERR(ierr);
773da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
774da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
775da3a660dSBarry Smith         idx  = A->j + diag[i];
776da3a660dSBarry Smith         v    = A->a + diag[i];
777da3a660dSBarry Smith         sum  = b[i];
778da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
779da3a660dSBarry Smith         d    = shift + A->a[diag[i]-1];
780da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
781da3a660dSBarry Smith         idx  = B->j + B->i[i] - 1;
782da3a660dSBarry Smith         v    = B->a + B->i[i] - 1;
783da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
784da3a660dSBarry Smith         x[i] = omega*(sum/d);
785da3a660dSBarry Smith       }
786*edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
787da3a660dSBarry Smith                             mat->Mvctx); CHKERR(ierr);
788da3a660dSBarry Smith       its--;
789da3a660dSBarry Smith     }
790d6dfbf8fSBarry Smith     while (its--) {
79106be10caSBarry Smith       ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
792d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
79306be10caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
794d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
79506be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
796d6dfbf8fSBarry Smith                               mat->Mvctx); CHKERR(ierr);
797d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
798d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
799d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
800d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
801d6dfbf8fSBarry Smith         sum  = b[i];
802d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
803d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
804d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
805d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
806d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
807d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
808d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
809d6dfbf8fSBarry Smith       }
810*edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
811d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
812d6dfbf8fSBarry Smith     }
813d6dfbf8fSBarry Smith   }
814d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
815da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
816da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
817da3a660dSBarry Smith     }
81806be10caSBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
819d6dfbf8fSBarry Smith     CHKERR(ierr);
82006be10caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
821d6dfbf8fSBarry Smith     CHKERR(ierr);
822d6dfbf8fSBarry Smith     while (its--) {
823d6dfbf8fSBarry Smith       /* go down through the rows */
824d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
825d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
826d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
827d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
828d6dfbf8fSBarry Smith         sum  = b[i];
829d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
830d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
831d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
832d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
833d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
834d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
835d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
836d6dfbf8fSBarry Smith       }
837d6dfbf8fSBarry Smith       /* come up through the rows */
838d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
839d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
840d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
841d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
842d6dfbf8fSBarry Smith         sum  = b[i];
843d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
844d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
845d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
846d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
847d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
848d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
849d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
850d6dfbf8fSBarry Smith       }
851d6dfbf8fSBarry Smith     }
852d6dfbf8fSBarry Smith   }
853d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
854da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
855da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
856da3a660dSBarry Smith     }
85706be10caSBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
858d6dfbf8fSBarry Smith     CHKERR(ierr);
85906be10caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
860d6dfbf8fSBarry Smith     CHKERR(ierr);
861d6dfbf8fSBarry Smith     while (its--) {
862d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
863d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
864d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
865d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
866d6dfbf8fSBarry Smith         sum  = b[i];
867d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
868d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
869d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
870d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
871d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
872d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
873d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
874d6dfbf8fSBarry Smith       }
875d6dfbf8fSBarry Smith     }
876d6dfbf8fSBarry Smith   }
877d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
878da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
879da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
880da3a660dSBarry Smith     }
88106be10caSBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,
882d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
88306be10caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,
884d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
885d6dfbf8fSBarry Smith     while (its--) {
886d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
887d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
888d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
889d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
890d6dfbf8fSBarry Smith         sum  = b[i];
891d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
892d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
893d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
894d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
895d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
896d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
897d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
898d6dfbf8fSBarry Smith       }
899d6dfbf8fSBarry Smith     }
900d6dfbf8fSBarry Smith   }
9018a729477SBarry Smith   return 0;
9028a729477SBarry Smith }
903a66be287SLois Curfman McInnes 
904fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
905fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
906a66be287SLois Curfman McInnes {
907a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
908a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
909a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
910a66be287SLois Curfman McInnes 
911a66be287SLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERR(ierr);
912a66be287SLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERR(ierr);
913a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
914a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
915a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
916a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
917a66be287SLois Curfman McInnes     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm);
918a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
919a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
920a66be287SLois Curfman McInnes     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm);
921a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
922a66be287SLois Curfman McInnes   }
923a66be287SLois Curfman McInnes   return 0;
924a66be287SLois Curfman McInnes }
925a66be287SLois Curfman McInnes 
926fc76ce87SLois Curfman McInnes static int MatSetOption_MPIAIJ(Mat aijin,MatOption op)
927c74985f6SBarry Smith {
92844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
929c74985f6SBarry Smith 
930c74985f6SBarry Smith   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
931c74985f6SBarry Smith     MatSetOption(aij->A,op);
932c74985f6SBarry Smith     MatSetOption(aij->B,op);
933c74985f6SBarry Smith   }
934c74985f6SBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) {
935c74985f6SBarry Smith     MatSetOption(aij->A,op);
936c74985f6SBarry Smith     MatSetOption(aij->B,op);
937c74985f6SBarry Smith   }
938c74985f6SBarry Smith   else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported");
939c74985f6SBarry Smith   return 0;
940c74985f6SBarry Smith }
941c74985f6SBarry Smith 
942d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
943c74985f6SBarry Smith {
94444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
945c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
946c74985f6SBarry Smith   return 0;
947c74985f6SBarry Smith }
948c74985f6SBarry Smith 
949d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
950c74985f6SBarry Smith {
95144a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
952c74985f6SBarry Smith   *m = mat->m; *n = mat->n;
953c74985f6SBarry Smith   return 0;
954c74985f6SBarry Smith }
955c74985f6SBarry Smith 
956d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
957c74985f6SBarry Smith {
95844a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
959c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
960c74985f6SBarry Smith   return 0;
961c74985f6SBarry Smith }
962c74985f6SBarry Smith 
96339e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
96439e00950SLois Curfman McInnes {
965154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
966154123eaSLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
967154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
968154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
96939e00950SLois Curfman McInnes 
970154123eaSLois Curfman McInnes   if (!mat->assembled)
97139e00950SLois Curfman McInnes     SETERR(1,"MatGetRow_MPIAIJ: Must assemble matrix first.");
97239e00950SLois Curfman McInnes   if (row < rstart || row >= rend)
973abc0e9e4SLois Curfman McInnes     SETERR(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.")
974abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
97539e00950SLois Curfman McInnes 
976154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
977154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
978154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
979154123eaSLois Curfman McInnes   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERR(ierr);
980154123eaSLois Curfman McInnes   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERR(ierr);
981154123eaSLois Curfman McInnes   nztot = nzA + nzB;
982154123eaSLois Curfman McInnes 
983154123eaSLois Curfman McInnes   if (v  || idx) {
984154123eaSLois Curfman McInnes     if (nztot) {
985154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
986154123eaSLois Curfman McInnes       int imark, imark2;
987154123eaSLois Curfman McInnes       for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
988154123eaSLois Curfman McInnes       if (v) {
98939e00950SLois Curfman McInnes         *v = (Scalar *) MALLOC( (nztot)*sizeof(Scalar) ); CHKPTR(*v);
99039e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
991154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
992154123eaSLois Curfman McInnes           else break;
993154123eaSLois Curfman McInnes         }
994154123eaSLois Curfman McInnes         imark = i;
995154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
996154123eaSLois Curfman McInnes         imark2 = imark+nzA;
997154123eaSLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[imark2+i] = vworkB[i];
998154123eaSLois Curfman McInnes       }
999154123eaSLois Curfman McInnes       if (idx) {
1000154123eaSLois Curfman McInnes         *idx = (int *) MALLOC( (nztot)*sizeof(int) ); CHKPTR(*idx);
1001154123eaSLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1002154123eaSLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1003154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1004154123eaSLois Curfman McInnes           else break;
1005154123eaSLois Curfman McInnes         }
1006154123eaSLois Curfman McInnes         imark = i;
1007154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1008154123eaSLois Curfman McInnes         imark2 = imark+nzA;
1009154123eaSLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[imark2+i] = cworkB[i];
101039e00950SLois Curfman McInnes       }
101139e00950SLois Curfman McInnes     }
101239e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1013154123eaSLois Curfman McInnes   }
101439e00950SLois Curfman McInnes   *nz = nztot;
1015154123eaSLois Curfman McInnes   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERR(ierr);
1016154123eaSLois Curfman McInnes   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERR(ierr);
101739e00950SLois Curfman McInnes   return 0;
101839e00950SLois Curfman McInnes }
101939e00950SLois Curfman McInnes 
102039e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
102139e00950SLois Curfman McInnes {
1022154123eaSLois Curfman McInnes   if (idx) FREE(*idx);
1023154123eaSLois Curfman McInnes   if (v) FREE(*v);
102439e00950SLois Curfman McInnes   return 0;
102539e00950SLois Curfman McInnes }
102639e00950SLois Curfman McInnes 
1027fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
102807a0e7adSLois Curfman McInnes static int MatCopy_MPIAIJ_Private(Mat,Mat *);
1029d6dfbf8fSBarry Smith 
10308a729477SBarry Smith /* -------------------------------------------------------------------*/
10312ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
103239e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
103344a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
103444a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
10351eb62cbbSBarry Smith        0,0,0,0,
10368a729477SBarry Smith        0,0,
103744a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
10388a729477SBarry Smith        0,
1039a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1040d1710a03SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,0,
1041ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
10421eb62cbbSBarry Smith        0,
10432ee70a88SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,0,
1044c74985f6SBarry Smith        0,0,0,0,
1045d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
104677915d1cSLois Curfman McInnes        0,0,
104707a0e7adSLois Curfman McInnes        0,0,MatConvert_MPIAIJ,0,0,MatCopy_MPIAIJ_Private};
10488a729477SBarry Smith 
10498a729477SBarry Smith /*@
1050ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1051ff756334SLois Curfman McInnes    (the default parallel PETSc format).
10528a729477SBarry Smith 
10538a729477SBarry Smith    Input Parameters:
10541eb62cbbSBarry Smith .  comm - MPI communicator
10557d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
10567d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
10577d3e4905SLois Curfman McInnes            if N is given)
10587d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
10597d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
10607d3e4905SLois Curfman McInnes            if n is given)
1061ff756334SLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of matrix
1062ff756334SLois Curfman McInnes            (same for all local rows)
10631eb62cbbSBarry Smith .  d_nzz - number of nonzeros per row in diagonal portion of matrix or null
1064ff756334SLois Curfman McInnes            (possibly different for each row).  You must leave room for the
1065ff756334SLois Curfman McInnes            diagonal entry even if it is zero.
1066ff756334SLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of matrix
1067ff756334SLois Curfman McInnes            (same for all local rows)
10681eb62cbbSBarry Smith .  o_nzz - number of nonzeros per row in off-diagonal portion of matrix
1069ff756334SLois Curfman McInnes            or null (possibly different for each row).
10708a729477SBarry Smith 
1071ff756334SLois Curfman McInnes    Output Parameter:
10728a729477SBarry Smith .  newmat - the matrix
10738a729477SBarry Smith 
1074ff756334SLois Curfman McInnes    Notes:
1075ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1076ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
1077ff756334SLois Curfman McInnes    storage.  That is, the stored row and column indices begin at
1078ff756334SLois Curfman McInnes    one, not zero.
1079ff756334SLois Curfman McInnes 
1080ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1081ff756334SLois Curfman McInnes    (possibly both).
1082ff756334SLois Curfman McInnes 
1083ff756334SLois Curfman McInnes    The user can set d_nz, d_nnz, o_nz, and o_nnz to zero for PETSc to
1084ff756334SLois Curfman McInnes    control dynamic memory allocation.
1085ff756334SLois Curfman McInnes 
1086dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1087ff756334SLois Curfman McInnes 
1088dbd7a890SLois Curfman McInnes .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues()
10898a729477SBarry Smith @*/
10901eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
10911eb62cbbSBarry Smith                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
10928a729477SBarry Smith {
10938a729477SBarry Smith   Mat          mat;
109444a69424SLois Curfman McInnes   Mat_MPIAIJ   *aij;
10956abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
10968a729477SBarry Smith   *newmat         = 0;
109744a69424SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1098a5a9c739SBarry Smith   PLogObjectCreate(mat);
109944a69424SLois Curfman McInnes   mat->data       = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij);
11008a729477SBarry Smith   mat->ops        = &MatOps;
110144a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
110244a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
11038a729477SBarry Smith   mat->factor     = 0;
1104d6dfbf8fSBarry Smith 
1105d6dfbf8fSBarry Smith   mat->comm       = comm;
110607a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
11071eb62cbbSBarry Smith   MPI_Comm_rank(comm,&aij->mytid);
11081eb62cbbSBarry Smith   MPI_Comm_size(comm,&aij->numtids);
11091eb62cbbSBarry Smith 
1110dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
11111eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1112d6dfbf8fSBarry Smith     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
1113dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1114dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
11151eb62cbbSBarry Smith   }
1116dbd7a890SLois Curfman McInnes   if (m == PETSC_DECIDE)
1117dbd7a890SLois Curfman McInnes     {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
1118dbd7a890SLois Curfman McInnes   if (n == PETSC_DECIDE)
1119dbd7a890SLois Curfman McInnes     {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
11208a729477SBarry Smith   aij->m = m;
11218a729477SBarry Smith   aij->n = n;
11221eb62cbbSBarry Smith   aij->N = N;
11231eb62cbbSBarry Smith   aij->M = M;
11241eb62cbbSBarry Smith 
11251eb62cbbSBarry Smith   /* build local table of row and column ownerships */
11261eb62cbbSBarry Smith   aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int));
11271eb62cbbSBarry Smith   CHKPTR(aij->rowners);
11281eb62cbbSBarry Smith   aij->cowners = aij->rowners + aij->numtids + 1;
11291eb62cbbSBarry Smith   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
11301eb62cbbSBarry Smith   aij->rowners[0] = 0;
11311eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
11321eb62cbbSBarry Smith     aij->rowners[i] += aij->rowners[i-1];
11338a729477SBarry Smith   }
11341eb62cbbSBarry Smith   aij->rstart = aij->rowners[aij->mytid];
11351eb62cbbSBarry Smith   aij->rend   = aij->rowners[aij->mytid+1];
11361eb62cbbSBarry Smith   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
11371eb62cbbSBarry Smith   aij->cowners[0] = 0;
11381eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
11391eb62cbbSBarry Smith     aij->cowners[i] += aij->cowners[i-1];
11408a729477SBarry Smith   }
11411eb62cbbSBarry Smith   aij->cstart = aij->cowners[aij->mytid];
11421eb62cbbSBarry Smith   aij->cend   = aij->cowners[aij->mytid+1];
11438a729477SBarry Smith 
11448a729477SBarry Smith 
11456b5873e3SBarry Smith   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A);
11466b5873e3SBarry Smith   CHKERR(ierr);
1147a5a9c739SBarry Smith   PLogObjectParent(mat,aij->A);
11486b5873e3SBarry Smith   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B);
11496b5873e3SBarry Smith   CHKERR(ierr);
1150a5a9c739SBarry Smith   PLogObjectParent(mat,aij->B);
11518a729477SBarry Smith 
11521eb62cbbSBarry Smith   /* build cache for off array entries formed */
1153dbd7a890SLois Curfman McInnes   ierr = StashBuild_Private(&aij->stash); CHKERR(ierr);
11549e25ed09SBarry Smith   aij->colmap    = 0;
11559e25ed09SBarry Smith   aij->garray    = 0;
11568a729477SBarry Smith 
11571eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
11581eb62cbbSBarry Smith   aij->lvec      = 0;
11591eb62cbbSBarry Smith   aij->Mvctx     = 0;
1160d6dfbf8fSBarry Smith   aij->assembled = 0;
11618a729477SBarry Smith 
1162d6dfbf8fSBarry Smith   *newmat = mat;
1163d6dfbf8fSBarry Smith   return 0;
1164d6dfbf8fSBarry Smith }
1165c74985f6SBarry Smith 
116607a0e7adSLois Curfman McInnes static int MatCopy_MPIAIJ_Private(Mat matin,Mat *newmat)
1167d6dfbf8fSBarry Smith {
1168d6dfbf8fSBarry Smith   Mat        mat;
116944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
11702ee70a88SLois Curfman McInnes   int        ierr, len;
1171d6dfbf8fSBarry Smith   *newmat      = 0;
1172d6dfbf8fSBarry Smith 
1173d6dfbf8fSBarry Smith   if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix");
117444a69424SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1175a5a9c739SBarry Smith   PLogObjectCreate(mat);
117644a69424SLois Curfman McInnes   mat->data       = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij);
1177d6dfbf8fSBarry Smith   mat->ops        = &MatOps;
117844a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
117944a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1180d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1181d6dfbf8fSBarry Smith 
1182d6dfbf8fSBarry Smith   aij->m          = oldmat->m;
1183d6dfbf8fSBarry Smith   aij->n          = oldmat->n;
1184d6dfbf8fSBarry Smith   aij->M          = oldmat->M;
1185d6dfbf8fSBarry Smith   aij->N          = oldmat->N;
1186d6dfbf8fSBarry Smith 
1187d6dfbf8fSBarry Smith   aij->assembled  = 1;
1188d6dfbf8fSBarry Smith   aij->rstart     = oldmat->rstart;
1189d6dfbf8fSBarry Smith   aij->rend       = oldmat->rend;
1190d6dfbf8fSBarry Smith   aij->cstart     = oldmat->cstart;
1191d6dfbf8fSBarry Smith   aij->cend       = oldmat->cend;
1192d6dfbf8fSBarry Smith   aij->numtids    = oldmat->numtids;
1193d6dfbf8fSBarry Smith   aij->mytid      = oldmat->mytid;
119407a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
1195d6dfbf8fSBarry Smith 
1196d6dfbf8fSBarry Smith   aij->rowners        = (int *) MALLOC( (aij->numtids+1)*sizeof(int) );
1197d6dfbf8fSBarry Smith   CHKPTR(aij->rowners);
1198d6dfbf8fSBarry Smith   MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1199dbd7a890SLois Curfman McInnes   ierr = StashInitialize_Private(&aij->stash); CHKERR(ierr);
12002ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
12012ee70a88SLois Curfman McInnes     aij->colmap      = (int *) MALLOC( (aij->N)*sizeof(int) );
12022ee70a88SLois Curfman McInnes     CHKPTR(aij->colmap);
12032ee70a88SLois Curfman McInnes     MEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int));
12042ee70a88SLois Curfman McInnes   } else aij->colmap = 0;
12052ee70a88SLois Curfman McInnes   if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) {
12062ee70a88SLois Curfman McInnes     aij->garray      = (int *) MALLOC(len*sizeof(int) ); CHKPTR(aij->garray);
12072ee70a88SLois Curfman McInnes     MEMCPY(aij->garray,oldmat->garray,len*sizeof(int));
12082ee70a88SLois Curfman McInnes   } else aij->garray = 0;
1209d6dfbf8fSBarry Smith   mat->comm           = matin->comm;
1210d6dfbf8fSBarry Smith 
12116469c4f9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&aij->lvec); CHKERR(ierr);
1212a5a9c739SBarry Smith   PLogObjectParent(mat,aij->lvec);
1213d6dfbf8fSBarry Smith   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr);
1214a5a9c739SBarry Smith   PLogObjectParent(mat,aij->Mvctx);
121507a0e7adSLois Curfman McInnes   ierr =  MatConvert(oldmat->A,MATSAME,&aij->A); CHKERR(ierr);
1216a5a9c739SBarry Smith   PLogObjectParent(mat,aij->A);
121707a0e7adSLois Curfman McInnes   ierr =  MatConvert(oldmat->B,MATSAME,&aij->B); CHKERR(ierr);
1218a5a9c739SBarry Smith   PLogObjectParent(mat,aij->B);
12198a729477SBarry Smith   *newmat = mat;
12208a729477SBarry Smith   return 0;
12218a729477SBarry Smith }
1222