xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 51c98ccd7682f74d7786ded28296941f8494dcc4)
1cb512458SBarry Smith #ifndef lint
2*51c98ccdSLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.57 1995/07/12 03:04:20 curfman Exp curfman $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1744a69424SLois Curfman McInnes   Mat_AIJ    *B = (Mat_AIJ*) aij->B->data;
189e25ed09SBarry Smith   int        n = B->n,i;
1978b31e54SBarry Smith   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
2078b31e54SBarry Smith   PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int));
21abc0e9e4SLois Curfman McInnes   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
229e25ed09SBarry Smith   return 0;
239e25ed09SBarry Smith }
249e25ed09SBarry Smith 
252493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
262493cbb0SBarry Smith 
27*51c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
28299609e3SLois Curfman McInnes {
29299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
30299609e3SLois Curfman McInnes   int ierr;
31299609e3SLois Curfman McInnes   if (aij->numtids == 1) {
32*51c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
33299609e3SLois Curfman McInnes   } else
34299609e3SLois Curfman McInnes     SETERRQ(1,"MatGetReordering_MPIAIJ:  not yet supported in parallel.");
35299609e3SLois Curfman McInnes   return 0;
36299609e3SLois Curfman McInnes }
37299609e3SLois Curfman McInnes 
382ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
391eb62cbbSBarry Smith                             int *idxn,Scalar *v,InsertMode addv)
408a729477SBarry Smith {
4144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
421eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
431eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
448a729477SBarry Smith 
4507a0e7adSLois Curfman McInnes   if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) {
4678b31e54SBarry Smith     SETERRQ(1,"You cannot mix inserts and adds");
478a729477SBarry Smith   }
481eb62cbbSBarry Smith   aij->insertmode = addv;
498a729477SBarry Smith   for ( i=0; i<m; i++ ) {
5078b31e54SBarry Smith     if (idxm[i] < 0) SETERRQ(1,"Negative row index");
5178b31e54SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(1,"Row index too large");
521eb62cbbSBarry Smith     if (idxm[i] >= rstart && idxm[i] < rend) {
531eb62cbbSBarry Smith       row = idxm[i] - rstart;
541eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
5578b31e54SBarry Smith         if (idxn[j] < 0) SETERRQ(1,"Negative column index");
5678b31e54SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(1,"Column index too large");
571eb62cbbSBarry Smith         if (idxn[j] >= cstart && idxn[j] < cend){
581eb62cbbSBarry Smith           col = idxn[j] - cstart;
5978b31e54SBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
601eb62cbbSBarry Smith         }
611eb62cbbSBarry Smith         else {
62d6dfbf8fSBarry Smith           if (aij->assembled) {
63b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
649e25ed09SBarry Smith             col = aij->colmap[idxn[j]] - 1;
65b7c46309SBarry Smith             if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) {
662493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
672493cbb0SBarry Smith               col =  idxn[j];
68d6dfbf8fSBarry Smith             }
699e25ed09SBarry Smith           }
709e25ed09SBarry Smith           else col = idxn[j];
7178b31e54SBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
721eb62cbbSBarry Smith         }
731eb62cbbSBarry Smith       }
741eb62cbbSBarry Smith     }
751eb62cbbSBarry Smith     else {
76dbd7a890SLois Curfman McInnes       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);
7778b31e54SBarry Smith       CHKERRQ(ierr);
781eb62cbbSBarry Smith     }
798a729477SBarry Smith   }
808a729477SBarry Smith   return 0;
818a729477SBarry Smith }
828a729477SBarry Smith 
838a729477SBarry Smith /*
841eb62cbbSBarry Smith     the assembly code is a lot like the code for vectors, we should
851eb62cbbSBarry Smith     sometime derive a single assembly code that can be used for
861eb62cbbSBarry Smith     either case.
878a729477SBarry Smith */
888a729477SBarry Smith 
89fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
908a729477SBarry Smith {
9144a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
92d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
936abc6512SBarry Smith   int         numtids = aij->numtids, *owners = aij->rowners;
941eb62cbbSBarry Smith   int         mytid = aij->mytid;
951eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
966abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
975392566eSBarry Smith   int         tag = mat->tag, *owner,*starts,count,ierr;
981eb62cbbSBarry Smith   InsertMode  addv;
991eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1001eb62cbbSBarry Smith 
1011eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
10228988994SBarry Smith   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
1031eb62cbbSBarry Smith                 MPI_BOR,comm);
1049d00d63dSBarry Smith   if (addv == (ADDVALUES|INSERTVALUES)) {
10578b31e54SBarry Smith     SETERRQ(1,"Some processors have inserted while others have added");
1061eb62cbbSBarry Smith   }
1071eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1081eb62cbbSBarry Smith 
1091eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
11078b31e54SBarry Smith   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
11178b31e54SBarry Smith   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
11278b31e54SBarry Smith   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1131eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1141eb62cbbSBarry Smith     idx = aij->stash.idx[i];
1151eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
1161eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1171eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1188a729477SBarry Smith       }
1198a729477SBarry Smith     }
1208a729477SBarry Smith   }
1211eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
1221eb62cbbSBarry Smith 
1231eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
12478b31e54SBarry Smith   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
1251eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
1261eb62cbbSBarry Smith   nreceives = work[mytid];
1271eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
1281eb62cbbSBarry Smith   nmax = work[mytid];
12978b31e54SBarry Smith   PETSCFREE(work);
1301eb62cbbSBarry Smith 
1311eb62cbbSBarry Smith   /* post receives:
1321eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1331eb62cbbSBarry Smith      (global index,value) we store the global index as a double
134d6dfbf8fSBarry Smith      to simplify the message passing.
1351eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1361eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1371eb62cbbSBarry Smith      this is a lot of wasted space.
1381eb62cbbSBarry Smith 
1391eb62cbbSBarry Smith 
1401eb62cbbSBarry Smith        This could be done better.
1411eb62cbbSBarry Smith   */
14278b31e54SBarry Smith   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
14378b31e54SBarry Smith   CHKPTRQ(rvalues);
14478b31e54SBarry Smith   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
14578b31e54SBarry Smith   CHKPTRQ(recv_waits);
1461eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
147c60448a5SBarry Smith     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1481eb62cbbSBarry Smith               comm,recv_waits+i);
1491eb62cbbSBarry Smith   }
1501eb62cbbSBarry Smith 
1511eb62cbbSBarry Smith   /* do sends:
1521eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1531eb62cbbSBarry Smith          the ith processor
1541eb62cbbSBarry Smith   */
15578b31e54SBarry Smith   svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
15678b31e54SBarry Smith   CHKPTRQ(svalues);
15778b31e54SBarry Smith   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
15878b31e54SBarry Smith   CHKPTRQ(send_waits);
15978b31e54SBarry Smith   starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts);
1601eb62cbbSBarry Smith   starts[0] = 0;
1611eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1621eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1631eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
1641eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
1651eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
1661eb62cbbSBarry Smith   }
16778b31e54SBarry Smith   PETSCFREE(owner);
1681eb62cbbSBarry Smith   starts[0] = 0;
1691eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1701eb62cbbSBarry Smith   count = 0;
1711eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
1721eb62cbbSBarry Smith     if (procs[i]) {
173c60448a5SBarry Smith       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
1741eb62cbbSBarry Smith                 comm,send_waits+count++);
1751eb62cbbSBarry Smith     }
1761eb62cbbSBarry Smith   }
17778b31e54SBarry Smith   PETSCFREE(starts); PETSCFREE(nprocs);
1781eb62cbbSBarry Smith 
1791eb62cbbSBarry Smith   /* Free cache space */
18078b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
1811eb62cbbSBarry Smith 
1821eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues = rvalues;
1831eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs = nreceives;
1841eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
1851eb62cbbSBarry Smith   aij->rmax       = nmax;
1861eb62cbbSBarry Smith 
1871eb62cbbSBarry Smith   return 0;
1881eb62cbbSBarry Smith }
18944a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
1901eb62cbbSBarry Smith 
191fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
1921eb62cbbSBarry Smith {
1931eb62cbbSBarry Smith   int        ierr;
19444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1951eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
1966abc6512SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
1972493cbb0SBarry Smith   int         row,col,other_disassembled;
1981eb62cbbSBarry Smith   Scalar      *values,val;
1991eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2001eb62cbbSBarry Smith 
2011eb62cbbSBarry Smith   /*  wait on receives */
2021eb62cbbSBarry Smith   while (count) {
203d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2041eb62cbbSBarry Smith     /* unpack receives into our local space */
205d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
206c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2071eb62cbbSBarry Smith     n = n/3;
2081eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
2091eb62cbbSBarry Smith       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
2101eb62cbbSBarry Smith       col = (int) PETSCREAL(values[3*i+1]);
2111eb62cbbSBarry Smith       val = values[3*i+2];
2121eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2131eb62cbbSBarry Smith           col -= aij->cstart;
2141eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2151eb62cbbSBarry Smith       }
2161eb62cbbSBarry Smith       else {
217d6dfbf8fSBarry Smith         if (aij->assembled) {
218b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
2199e25ed09SBarry Smith           col = aij->colmap[col] - 1;
220b7c46309SBarry Smith           if (col < 0  && !((Mat_AIJ*)(aij->A->data))->nonew) {
2212493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2222493cbb0SBarry Smith             col = (int) PETSCREAL(values[3*i+1]);
223d6dfbf8fSBarry Smith           }
2249e25ed09SBarry Smith         }
2251eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2261eb62cbbSBarry Smith       }
2271eb62cbbSBarry Smith     }
2281eb62cbbSBarry Smith     count--;
2291eb62cbbSBarry Smith   }
23078b31e54SBarry Smith   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
2311eb62cbbSBarry Smith 
2321eb62cbbSBarry Smith   /* wait on sends */
2331eb62cbbSBarry Smith   if (aij->nsends) {
23478b31e54SBarry Smith     send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) );
23578b31e54SBarry Smith     CHKPTRQ(send_status);
2361eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
23778b31e54SBarry Smith     PETSCFREE(send_status);
2381eb62cbbSBarry Smith   }
23978b31e54SBarry Smith   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
2401eb62cbbSBarry Smith 
24107a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
24278b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
24378b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2441eb62cbbSBarry Smith 
2452493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2462493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
2472493cbb0SBarry Smith   MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1,
2482493cbb0SBarry Smith                  MPI_INT,MPI_PROD,mat->comm);
2492493cbb0SBarry Smith   if (aij->assembled && !other_disassembled) {
2502493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2512493cbb0SBarry Smith   }
2522493cbb0SBarry Smith 
2535e42470aSBarry Smith   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
25478b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
255d6dfbf8fSBarry Smith     aij->assembled = 1;
2565e42470aSBarry Smith   }
25778b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
25878b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2595e42470aSBarry Smith 
2608a729477SBarry Smith   return 0;
2618a729477SBarry Smith }
2628a729477SBarry Smith 
2632ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
2641eb62cbbSBarry Smith {
26544a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
266dbd7a890SLois Curfman McInnes   int ierr;
26778b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
26878b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
2691eb62cbbSBarry Smith   return 0;
2701eb62cbbSBarry Smith }
2711eb62cbbSBarry Smith 
2721eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and
2731eb62cbbSBarry Smith    scatter create routines, we should try to do it systemamatically
2741eb62cbbSBarry Smith    if we can figure out the proper level of generality. */
2751eb62cbbSBarry Smith 
2761eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
2771eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
2781eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
2791eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
2801eb62cbbSBarry Smith    routine.
2811eb62cbbSBarry Smith */
28244a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
2831eb62cbbSBarry Smith {
28444a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
2851eb62cbbSBarry Smith   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
2866abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
2871eb62cbbSBarry Smith   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
2885392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
289d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
290d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
2911eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
2921eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
2931eb62cbbSBarry Smith   IS             istmp;
2941eb62cbbSBarry Smith 
295c8f67822SLois Curfman McInnes   if (!l->assembled)
29678b31e54SBarry Smith     SETERRQ(1,"MatZeroRows_MPIAIJ: Must assemble matrix first");
29778b31e54SBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
29878b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
2991eb62cbbSBarry Smith 
3001eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
30178b31e54SBarry Smith   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
30278b31e54SBarry Smith   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
30378b31e54SBarry Smith   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3041eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3051eb62cbbSBarry Smith     idx = rows[i];
3061eb62cbbSBarry Smith     found = 0;
3071eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
3081eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3091eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3101eb62cbbSBarry Smith       }
3111eb62cbbSBarry Smith     }
31278b31e54SBarry Smith     if (!found) SETERRQ(1,"Index out of range.");
3131eb62cbbSBarry Smith   }
3141eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
3151eb62cbbSBarry Smith 
3161eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
31778b31e54SBarry Smith   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
3181eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
3191eb62cbbSBarry Smith   nrecvs = work[mytid];
3201eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
3211eb62cbbSBarry Smith   nmax = work[mytid];
32278b31e54SBarry Smith   PETSCFREE(work);
3231eb62cbbSBarry Smith 
3241eb62cbbSBarry Smith   /* post receives:   */
32578b31e54SBarry Smith   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
32678b31e54SBarry Smith   CHKPTRQ(rvalues);
32778b31e54SBarry Smith   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
32878b31e54SBarry Smith   CHKPTRQ(recv_waits);
3291eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3301eb62cbbSBarry Smith     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
3311eb62cbbSBarry Smith               comm,recv_waits+i);
3321eb62cbbSBarry Smith   }
3331eb62cbbSBarry Smith 
3341eb62cbbSBarry Smith   /* do sends:
3351eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3361eb62cbbSBarry Smith          the ith processor
3371eb62cbbSBarry Smith   */
33878b31e54SBarry Smith   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
33978b31e54SBarry Smith   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
34078b31e54SBarry Smith   CHKPTRQ(send_waits);
34178b31e54SBarry Smith   starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts);
3421eb62cbbSBarry Smith   starts[0] = 0;
3431eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3441eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3451eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3461eb62cbbSBarry Smith   }
3471eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3481eb62cbbSBarry Smith 
3491eb62cbbSBarry Smith   starts[0] = 0;
3501eb62cbbSBarry Smith   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3511eb62cbbSBarry Smith   count = 0;
3521eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
3531eb62cbbSBarry Smith     if (procs[i]) {
3541eb62cbbSBarry Smith       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
3551eb62cbbSBarry Smith                 comm,send_waits+count++);
3561eb62cbbSBarry Smith     }
3571eb62cbbSBarry Smith   }
35878b31e54SBarry Smith   PETSCFREE(starts);
3591eb62cbbSBarry Smith 
3601eb62cbbSBarry Smith   base = owners[mytid];
3611eb62cbbSBarry Smith 
3621eb62cbbSBarry Smith   /*  wait on receives */
36378b31e54SBarry Smith   lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3641eb62cbbSBarry Smith   source = lens + nrecvs;
3651eb62cbbSBarry Smith   count = nrecvs; slen = 0;
3661eb62cbbSBarry Smith   while (count) {
367d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3681eb62cbbSBarry Smith     /* unpack receives into our local space */
3691eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
370d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
371d6dfbf8fSBarry Smith     lens[imdex]  = n;
3721eb62cbbSBarry Smith     slen += n;
3731eb62cbbSBarry Smith     count--;
3741eb62cbbSBarry Smith   }
37578b31e54SBarry Smith   PETSCFREE(recv_waits);
3761eb62cbbSBarry Smith 
3771eb62cbbSBarry Smith   /* move the data into the send scatter */
37878b31e54SBarry Smith   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
3791eb62cbbSBarry Smith   count = 0;
3801eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3811eb62cbbSBarry Smith     values = rvalues + i*nmax;
3821eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
3831eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
3841eb62cbbSBarry Smith     }
3851eb62cbbSBarry Smith   }
38678b31e54SBarry Smith   PETSCFREE(rvalues); PETSCFREE(lens);
38778b31e54SBarry Smith   PETSCFREE(owner); PETSCFREE(nprocs);
3881eb62cbbSBarry Smith 
3891eb62cbbSBarry Smith   /* actually zap the local rows */
3906b5873e3SBarry Smith   ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp);
39178b31e54SBarry Smith   CHKERRQ(ierr);  PETSCFREE(lrows);
39278b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
39378b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
39478b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
3951eb62cbbSBarry Smith 
3961eb62cbbSBarry Smith   /* wait on sends */
3971eb62cbbSBarry Smith   if (nsends) {
39878b31e54SBarry Smith     send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) );
39978b31e54SBarry Smith     CHKPTRQ(send_status);
4001eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
40178b31e54SBarry Smith     PETSCFREE(send_status);
4021eb62cbbSBarry Smith   }
40378b31e54SBarry Smith   PETSCFREE(send_waits); PETSCFREE(svalues);
4041eb62cbbSBarry Smith 
4051eb62cbbSBarry Smith 
4061eb62cbbSBarry Smith   return 0;
4071eb62cbbSBarry Smith }
4081eb62cbbSBarry Smith 
40944a69424SLois Curfman McInnes static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
4101eb62cbbSBarry Smith {
41144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
4121eb62cbbSBarry Smith   int        ierr;
41378b31e54SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first");
41406be10caSBarry Smith   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
41578b31e54SBarry Smith   CHKERRQ(ierr);
41678b31e54SBarry Smith   ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr);
41706be10caSBarry Smith   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
41878b31e54SBarry Smith   CHKERRQ(ierr);
41978b31e54SBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr);
4201eb62cbbSBarry Smith   return 0;
4211eb62cbbSBarry Smith }
4221eb62cbbSBarry Smith 
42344a69424SLois Curfman McInnes static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
424da3a660dSBarry Smith {
42544a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
426da3a660dSBarry Smith   int        ierr;
42778b31e54SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first");
42806be10caSBarry Smith   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
42978b31e54SBarry Smith   CHKERRQ(ierr);
43078b31e54SBarry Smith   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
43106be10caSBarry Smith   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
43278b31e54SBarry Smith   CHKERRQ(ierr);
43378b31e54SBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr);
434da3a660dSBarry Smith   return 0;
435da3a660dSBarry Smith }
436da3a660dSBarry Smith 
43744a69424SLois Curfman McInnes static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
438da3a660dSBarry Smith {
43944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
440da3a660dSBarry Smith   int        ierr;
441da3a660dSBarry Smith 
44244a69424SLois Curfman McInnes   if (!aij->assembled)
44378b31e54SBarry Smith     SETERRQ(1,"MatMulTrans_MPIAIJ: must assemble matrix first");
444da3a660dSBarry Smith   /* do nondiagonal part */
44578b31e54SBarry Smith   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
446da3a660dSBarry Smith   /* send it on its way */
44706be10caSBarry Smith   ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES,
44878b31e54SBarry Smith            (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
449da3a660dSBarry Smith   /* do local part */
45078b31e54SBarry Smith   ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr);
451da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
452da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
453da3a660dSBarry Smith   /* but is not perhaps always true. */
45406be10caSBarry Smith   ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES,
45578b31e54SBarry Smith          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
456da3a660dSBarry Smith   return 0;
457da3a660dSBarry Smith }
458da3a660dSBarry Smith 
45944a69424SLois Curfman McInnes static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
460da3a660dSBarry Smith {
46144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
462da3a660dSBarry Smith   int        ierr;
463da3a660dSBarry Smith 
46444a69424SLois Curfman McInnes   if (!aij->assembled)
46578b31e54SBarry Smith     SETERRQ(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first");
466da3a660dSBarry Smith   /* do nondiagonal part */
46778b31e54SBarry Smith   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
468da3a660dSBarry Smith   /* send it on its way */
46906be10caSBarry Smith   ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES,
47078b31e54SBarry Smith          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
471da3a660dSBarry Smith   /* do local part */
47278b31e54SBarry Smith   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
473da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
474da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
475da3a660dSBarry Smith   /* but is not perhaps always true. */
47606be10caSBarry Smith   ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES,
47778b31e54SBarry Smith        (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
478da3a660dSBarry Smith   return 0;
479da3a660dSBarry Smith }
480da3a660dSBarry Smith 
4811eb62cbbSBarry Smith /*
4821eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
4831eb62cbbSBarry Smith    diagonal block
4841eb62cbbSBarry Smith */
485d1710a03SLois Curfman McInnes static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v)
4861eb62cbbSBarry Smith {
48744a69424SLois Curfman McInnes   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
48878b31e54SBarry Smith   if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ: must assemble matrix first");
4891eb62cbbSBarry Smith   return MatGetDiagonal(A->A,v);
4901eb62cbbSBarry Smith }
4911eb62cbbSBarry Smith 
49244a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
4931eb62cbbSBarry Smith {
4941eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
49544a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
4961eb62cbbSBarry Smith   int        ierr;
497a5a9c739SBarry Smith #if defined(PETSC_LOG)
498a5a9c739SBarry Smith   PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N);
499a5a9c739SBarry Smith #endif
50078b31e54SBarry Smith   PETSCFREE(aij->rowners);
50178b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
50278b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
50378b31e54SBarry Smith   if (aij->colmap) PETSCFREE(aij->colmap);
50478b31e54SBarry Smith   if (aij->garray) PETSCFREE(aij->garray);
5051eb62cbbSBarry Smith   if (aij->lvec) VecDestroy(aij->lvec);
5061eb62cbbSBarry Smith   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
50778b31e54SBarry Smith   PETSCFREE(aij);
508a5a9c739SBarry Smith   PLogObjectDestroy(mat);
509a5a9c739SBarry Smith   PETSCHEADERDESTROY(mat);
5101eb62cbbSBarry Smith   return 0;
5111eb62cbbSBarry Smith }
5127857610eSBarry Smith #include "draw.h"
513ee50ffe9SBarry Smith #include "pviewer.h"
514ee50ffe9SBarry Smith 
51544a69424SLois Curfman McInnes static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
5161eb62cbbSBarry Smith {
5171eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
51844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5191eb62cbbSBarry Smith   int        ierr;
520d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
5211eb62cbbSBarry Smith 
52278b31e54SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ: must assemble matrix first");
523154123eaSLois Curfman McInnes   if (!viewer) { /* so that viewers may be used from debuggers */
524154123eaSLois Curfman McInnes     viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer;
525154123eaSLois Curfman McInnes   }
526ab254492SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
5277857610eSBarry Smith   if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) {
5284a0ce102SLois Curfman McInnes     FILE *fd = ViewerFileGetPointer_Private(viewer);
5297f813858SBarry Smith     MPIU_Seq_begin(mat->comm,1);
530d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
5311eb62cbbSBarry Smith              aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5321eb62cbbSBarry Smith              aij->cend);
53378b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
53478b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
535d13ab20cSBarry Smith     fflush(fd);
5367f813858SBarry Smith     MPIU_Seq_end(mat->comm,1);
537d13ab20cSBarry Smith   }
5387857610eSBarry Smith   else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) ||
5397857610eSBarry Smith             vobj->cookie == DRAW_COOKIE) {
54095373324SBarry Smith     int numtids = aij->numtids, mytid = aij->mytid;
54195373324SBarry Smith     if (numtids == 1) {
54278b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
54395373324SBarry Smith     }
54495373324SBarry Smith     else {
54595373324SBarry Smith       /* assemble the entire matrix onto first processor. */
54695373324SBarry Smith       Mat     A;
5472ee70a88SLois Curfman McInnes       Mat_AIJ *Aloc;
5482eb8c8abSBarry Smith       int     M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
54995373324SBarry Smith       Scalar  *a;
5502ee70a88SLois Curfman McInnes 
55195373324SBarry Smith       if (!mytid) {
55295373324SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);
55395373324SBarry Smith       }
55495373324SBarry Smith       else {
55595373324SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);
55695373324SBarry Smith       }
55778b31e54SBarry Smith       CHKERRQ(ierr);
55895373324SBarry Smith 
55995373324SBarry Smith       /* copy over the A part */
5602ee70a88SLois Curfman McInnes       Aloc = (Mat_AIJ*) aij->A->data;
5612ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
56295373324SBarry Smith       row = aij->rstart;
563b7c46309SBarry Smith       for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;}
56495373324SBarry Smith       for ( i=0; i<m; i++ ) {
56507a0e7adSLois Curfman McInnes         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES);
56678b31e54SBarry Smith         CHKERRQ(ierr);
56795373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
56895373324SBarry Smith       }
5692ee70a88SLois Curfman McInnes       aj = Aloc->j;
570b7c46309SBarry Smith       for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;}
57195373324SBarry Smith 
57295373324SBarry Smith       /* copy over the B part */
5732ee70a88SLois Curfman McInnes       Aloc = (Mat_AIJ*) aij->B->data;
5742ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
57595373324SBarry Smith       row = aij->rstart;
57678b31e54SBarry Smith       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
577b7c46309SBarry Smith       for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];}
57895373324SBarry Smith       for ( i=0; i<m; i++ ) {
57907a0e7adSLois Curfman McInnes         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES);
58078b31e54SBarry Smith         CHKERRQ(ierr);
58195373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
58295373324SBarry Smith       }
58378b31e54SBarry Smith       PETSCFREE(ct);
58478b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
58578b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
58695373324SBarry Smith       if (!mytid) {
58778b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
58895373324SBarry Smith       }
58978b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
59095373324SBarry Smith     }
59195373324SBarry Smith   }
5921eb62cbbSBarry Smith   return 0;
5931eb62cbbSBarry Smith }
5941eb62cbbSBarry Smith 
595d3c9fed9SLois Curfman McInnes extern int MatMarkDiag_AIJ(Mat_AIJ  *);
5961eb62cbbSBarry Smith /*
5971eb62cbbSBarry Smith     This has to provide several versions.
5981eb62cbbSBarry Smith 
5991eb62cbbSBarry Smith      1) per sequential
6001eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6011eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
602d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
6031eb62cbbSBarry Smith */
604fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
605fc76ce87SLois Curfman McInnes                            double shift,int its,Vec xx)
6068a729477SBarry Smith {
60744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
608d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
60944a69424SLois Curfman McInnes   Mat_AIJ    *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data;
6106abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
6116abc6512SBarry Smith   int        ierr,*idx, *diag;
6126abc6512SBarry Smith   int        n = mat->n, m = mat->m, i;
613da3a660dSBarry Smith   Vec        tt;
6148a729477SBarry Smith 
61578b31e54SBarry Smith   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ: must assemble matrix first");
6161eb62cbbSBarry Smith 
617d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
618d6dfbf8fSBarry Smith   xs = x -1; /* shift by one for index start of 1 */
619da3a660dSBarry Smith   ls--;
620d3c9fed9SLois Curfman McInnes   if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;}
621d6dfbf8fSBarry Smith   diag = A->diag;
622acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
62378b31e54SBarry Smith     SETERRQ(1,"That option not yet support for parallel AIJ matrices");
624acb40c82SBarry Smith   }
625da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
626da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
627da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
628da3a660dSBarry Smith 
629da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
630da3a660dSBarry Smith 
631da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
632da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
633da3a660dSBarry Smith     is the relaxation factor.
634da3a660dSBarry Smith     */
63578b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
636da3a660dSBarry Smith     VecGetArray(tt,&t);
637da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
638da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
639da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
64006be10caSBarry Smith     ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
64178b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
642da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
643da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
644da3a660dSBarry Smith       idx  = A->j + diag[i];
645da3a660dSBarry Smith       v    = A->a + diag[i];
646da3a660dSBarry Smith       sum  = b[i];
647da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
648da3a660dSBarry Smith       d    = shift + A->a[diag[i]-1];
649da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
650da3a660dSBarry Smith       idx  = B->j + B->i[i] - 1;
651da3a660dSBarry Smith       v    = B->a + B->i[i] - 1;
652da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
653da3a660dSBarry Smith       x[i] = omega*(sum/d);
654da3a660dSBarry Smith     }
655edd2f0e1SBarry Smith     ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
65678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
657da3a660dSBarry Smith 
658da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
659da3a660dSBarry Smith     v = A->a;
660da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
661da3a660dSBarry Smith 
662da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
663da3a660dSBarry Smith     ts = t - 1; /* shifted by one for index start of a or mat->j*/
664da3a660dSBarry Smith     diag = A->diag;
665da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
66606be10caSBarry Smith     ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
66778b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
668da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
669da3a660dSBarry Smith       n    = diag[i] - A->i[i];
670da3a660dSBarry Smith       idx  = A->j + A->i[i] - 1;
671da3a660dSBarry Smith       v    = A->a + A->i[i] - 1;
672da3a660dSBarry Smith       sum  = t[i];
673da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
674da3a660dSBarry Smith       d    = shift + A->a[diag[i]-1];
675da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
676da3a660dSBarry Smith       idx  = B->j + B->i[i] - 1;
677da3a660dSBarry Smith       v    = B->a + B->i[i] - 1;
678da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
679da3a660dSBarry Smith       t[i] = omega*(sum/d);
680da3a660dSBarry Smith     }
681edd2f0e1SBarry Smith     ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
68278b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
683da3a660dSBarry Smith     /*  x = x + t */
684da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
685da3a660dSBarry Smith     VecDestroy(tt);
686da3a660dSBarry Smith     return 0;
687da3a660dSBarry Smith   }
688da3a660dSBarry Smith 
6891eb62cbbSBarry Smith 
690d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
691da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
692da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
693da3a660dSBarry Smith     }
694da3a660dSBarry Smith     else {
69506be10caSBarry Smith       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
69678b31e54SBarry Smith       CHKERRQ(ierr);
69706be10caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
69878b31e54SBarry Smith       CHKERRQ(ierr);
699da3a660dSBarry Smith     }
700d6dfbf8fSBarry Smith     while (its--) {
701d6dfbf8fSBarry Smith       /* go down through the rows */
70206be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
70378b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
704d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
705d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
706d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
707d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
708d6dfbf8fSBarry Smith         sum  = b[i];
709d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
710d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
711d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
712d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
713d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
714d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
715d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
716d6dfbf8fSBarry Smith       }
717edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
71878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
719d6dfbf8fSBarry Smith       /* come up through the rows */
72006be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
72178b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
722d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
723d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
724d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
725d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
726d6dfbf8fSBarry Smith         sum  = b[i];
727d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
728d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
729d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
730d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
731d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
732d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
733d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
734d6dfbf8fSBarry Smith       }
735edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
73678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
737d6dfbf8fSBarry Smith     }
738d6dfbf8fSBarry Smith   }
739d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
740da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
741da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
74206be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
74378b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
744da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
745da3a660dSBarry Smith         n    = diag[i] - A->i[i];
746da3a660dSBarry Smith         idx  = A->j + A->i[i] - 1;
747da3a660dSBarry Smith         v    = A->a + A->i[i] - 1;
748da3a660dSBarry Smith         sum  = b[i];
749da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
750da3a660dSBarry Smith         d    = shift + A->a[diag[i]-1];
751da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
752da3a660dSBarry Smith         idx  = B->j + B->i[i] - 1;
753da3a660dSBarry Smith         v    = B->a + B->i[i] - 1;
754da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
755da3a660dSBarry Smith         x[i] = omega*(sum/d);
756da3a660dSBarry Smith       }
757edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
75878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
759da3a660dSBarry Smith       its--;
760da3a660dSBarry Smith     }
761d6dfbf8fSBarry Smith     while (its--) {
76206be10caSBarry Smith       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
76378b31e54SBarry Smith       CHKERRQ(ierr);
76406be10caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
76578b31e54SBarry Smith       CHKERRQ(ierr);
76606be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
76778b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
768d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
769d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
770d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
771d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
772d6dfbf8fSBarry Smith         sum  = b[i];
773d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
774d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
775d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
776d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
777d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
778d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
779d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
780d6dfbf8fSBarry Smith       }
781edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
78278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
783d6dfbf8fSBarry Smith     }
784d6dfbf8fSBarry Smith   }
785d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
786da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
787da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
78806be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
78978b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
790da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
791da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
792da3a660dSBarry Smith         idx  = A->j + diag[i];
793da3a660dSBarry Smith         v    = A->a + diag[i];
794da3a660dSBarry Smith         sum  = b[i];
795da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
796da3a660dSBarry Smith         d    = shift + A->a[diag[i]-1];
797da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
798da3a660dSBarry Smith         idx  = B->j + B->i[i] - 1;
799da3a660dSBarry Smith         v    = B->a + B->i[i] - 1;
800da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
801da3a660dSBarry Smith         x[i] = omega*(sum/d);
802da3a660dSBarry Smith       }
803edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
80478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
805da3a660dSBarry Smith       its--;
806da3a660dSBarry Smith     }
807d6dfbf8fSBarry Smith     while (its--) {
80806be10caSBarry Smith       ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
80978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
81006be10caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
81178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
81206be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
81378b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
814d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
815d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
816d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
817d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
818d6dfbf8fSBarry Smith         sum  = b[i];
819d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
820d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
821d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
822d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
823d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
824d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
825d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
826d6dfbf8fSBarry Smith       }
827edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
82878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
829d6dfbf8fSBarry Smith     }
830d6dfbf8fSBarry Smith   }
831d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
832da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
833da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
834da3a660dSBarry Smith     }
83506be10caSBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
83678b31e54SBarry Smith     CHKERRQ(ierr);
83706be10caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
83878b31e54SBarry Smith     CHKERRQ(ierr);
839d6dfbf8fSBarry Smith     while (its--) {
840d6dfbf8fSBarry Smith       /* go down through the rows */
841d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
842d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
843d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
844d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
845d6dfbf8fSBarry Smith         sum  = b[i];
846d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
847d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
848d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
849d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
850d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
851d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
852d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
853d6dfbf8fSBarry Smith       }
854d6dfbf8fSBarry Smith       /* come up through the rows */
855d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
856d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
857d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
858d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
859d6dfbf8fSBarry Smith         sum  = b[i];
860d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
861d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
862d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
863d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
864d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
865d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
866d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
867d6dfbf8fSBarry Smith       }
868d6dfbf8fSBarry Smith     }
869d6dfbf8fSBarry Smith   }
870d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
871da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
872da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
873da3a660dSBarry Smith     }
87406be10caSBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
87578b31e54SBarry Smith     CHKERRQ(ierr);
87606be10caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
87778b31e54SBarry Smith     CHKERRQ(ierr);
878d6dfbf8fSBarry Smith     while (its--) {
879d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
880d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
881d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
882d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
883d6dfbf8fSBarry Smith         sum  = b[i];
884d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
885d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
886d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
887d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
888d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
889d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
890d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
891d6dfbf8fSBarry Smith       }
892d6dfbf8fSBarry Smith     }
893d6dfbf8fSBarry Smith   }
894d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
895da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
896da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
897da3a660dSBarry Smith     }
89806be10caSBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,
89978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
90006be10caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,
90178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
902d6dfbf8fSBarry Smith     while (its--) {
903d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
904d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
905d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
906d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
907d6dfbf8fSBarry Smith         sum  = b[i];
908d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
909d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
910d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
911d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
912d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
913d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
914d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
915d6dfbf8fSBarry Smith       }
916d6dfbf8fSBarry Smith     }
917d6dfbf8fSBarry Smith   }
9188a729477SBarry Smith   return 0;
9198a729477SBarry Smith }
920a66be287SLois Curfman McInnes 
921fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
922fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
923a66be287SLois Curfman McInnes {
924a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
925a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
926a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
927a66be287SLois Curfman McInnes 
92878b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
92978b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
930a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
931a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
932a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
933a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
934a66be287SLois Curfman McInnes     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm);
935a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
936a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
937a66be287SLois Curfman McInnes     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm);
938a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
939a66be287SLois Curfman McInnes   }
940a66be287SLois Curfman McInnes   return 0;
941a66be287SLois Curfman McInnes }
942a66be287SLois Curfman McInnes 
943299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
944299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
945299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
946299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
947299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
948299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
949299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
950299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
951299609e3SLois Curfman McInnes 
952fc76ce87SLois Curfman McInnes static int MatSetOption_MPIAIJ(Mat aijin,MatOption op)
953c74985f6SBarry Smith {
95444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
955c74985f6SBarry Smith 
956c74985f6SBarry Smith   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
957c74985f6SBarry Smith     MatSetOption(aij->A,op);
958c74985f6SBarry Smith     MatSetOption(aij->B,op);
959c74985f6SBarry Smith   }
960c74985f6SBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) {
961c74985f6SBarry Smith     MatSetOption(aij->A,op);
962c74985f6SBarry Smith     MatSetOption(aij->B,op);
963c74985f6SBarry Smith   }
96478b31e54SBarry Smith   else if (op == COLUMN_ORIENTED) SETERRQ(1,"Column oriented not supported");
965c74985f6SBarry Smith   return 0;
966c74985f6SBarry Smith }
967c74985f6SBarry Smith 
968d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
969c74985f6SBarry Smith {
97044a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
971c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
972c74985f6SBarry Smith   return 0;
973c74985f6SBarry Smith }
974c74985f6SBarry Smith 
975d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
976c74985f6SBarry Smith {
97744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
978b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
979c74985f6SBarry Smith   return 0;
980c74985f6SBarry Smith }
981c74985f6SBarry Smith 
982d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
983c74985f6SBarry Smith {
98444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
985c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
986c74985f6SBarry Smith   return 0;
987c74985f6SBarry Smith }
988c74985f6SBarry Smith 
98939e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
99039e00950SLois Curfman McInnes {
991154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
992154123eaSLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
993154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
994154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
99539e00950SLois Curfman McInnes 
99639e00950SLois Curfman McInnes   if (row < rstart || row >= rend)
99778b31e54SBarry Smith     SETERRQ(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.")
998abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
99939e00950SLois Curfman McInnes 
1000154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1001154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1002154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
100378b31e54SBarry Smith   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
100478b31e54SBarry Smith   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1005154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1006154123eaSLois Curfman McInnes 
1007154123eaSLois Curfman McInnes   if (v  || idx) {
1008154123eaSLois Curfman McInnes     if (nztot) {
1009154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1010299609e3SLois Curfman McInnes       int imark;
101148b35521SBarry Smith       if (mat->assembled) {
1012154123eaSLois Curfman McInnes         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
101348b35521SBarry Smith       }
1014154123eaSLois Curfman McInnes       if (v) {
101578b31e54SBarry Smith         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
101639e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1017154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1018154123eaSLois Curfman McInnes           else break;
1019154123eaSLois Curfman McInnes         }
1020154123eaSLois Curfman McInnes         imark = i;
1021154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1022299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1023154123eaSLois Curfman McInnes       }
1024154123eaSLois Curfman McInnes       if (idx) {
102578b31e54SBarry Smith         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1026154123eaSLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1027154123eaSLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1028154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1029154123eaSLois Curfman McInnes           else break;
1030154123eaSLois Curfman McInnes         }
1031154123eaSLois Curfman McInnes         imark = i;
1032154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1033299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
103439e00950SLois Curfman McInnes       }
103539e00950SLois Curfman McInnes     }
103639e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1037154123eaSLois Curfman McInnes   }
103839e00950SLois Curfman McInnes   *nz = nztot;
103978b31e54SBarry Smith   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
104078b31e54SBarry Smith   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
104139e00950SLois Curfman McInnes   return 0;
104239e00950SLois Curfman McInnes }
104339e00950SLois Curfman McInnes 
104439e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
104539e00950SLois Curfman McInnes {
104678b31e54SBarry Smith   if (idx) PETSCFREE(*idx);
104778b31e54SBarry Smith   if (v) PETSCFREE(*v);
104839e00950SLois Curfman McInnes   return 0;
104939e00950SLois Curfman McInnes }
105039e00950SLois Curfman McInnes 
1051b7c46309SBarry Smith static int MatTranspose_MPIAIJ(Mat A,Mat *Bin)
1052b7c46309SBarry Smith {
1053b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1054b7c46309SBarry Smith   int        ierr;
1055b7c46309SBarry Smith   Mat        B;
1056b7c46309SBarry Smith   Mat_AIJ    *Aloc;
1057b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1058b7c46309SBarry Smith   Scalar     *array;
1059b7c46309SBarry Smith 
1060b7c46309SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1061b7c46309SBarry Smith   CHKERRQ(ierr);
1062b7c46309SBarry Smith 
1063b7c46309SBarry Smith   /* copy over the A part */
1064b7c46309SBarry Smith   Aloc = (Mat_AIJ*) a->A->data;
1065b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1066b7c46309SBarry Smith   row = a->rstart;
1067b7c46309SBarry Smith   for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;}
1068b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1069b7c46309SBarry Smith       ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES);
1070b7c46309SBarry Smith       CHKERRQ(ierr);
1071b7c46309SBarry Smith       row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1072b7c46309SBarry Smith   }
1073b7c46309SBarry Smith   aj = Aloc->j;
1074b7c46309SBarry Smith   for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;}
1075b7c46309SBarry Smith 
1076b7c46309SBarry Smith   /* copy over the B part */
1077b7c46309SBarry Smith   Aloc = (Mat_AIJ*) a->B->data;
1078b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1079b7c46309SBarry Smith   row = a->rstart;
1080b7c46309SBarry Smith   ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
1081b7c46309SBarry Smith   for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];}
1082b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1083b7c46309SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES);
1084b7c46309SBarry Smith     CHKERRQ(ierr);
1085b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1086b7c46309SBarry Smith   }
1087b7c46309SBarry Smith   PETSCFREE(ct);
1088b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1089b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1090b7c46309SBarry Smith   *Bin = B;
1091b7c46309SBarry Smith   return 0;
1092b7c46309SBarry Smith }
1093b7c46309SBarry Smith 
1094fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1095ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1096d6dfbf8fSBarry Smith 
10978a729477SBarry Smith /* -------------------------------------------------------------------*/
10982ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
109939e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
110044a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
110144a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1102299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1103299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1104299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
110544a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1106b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1107a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1108d1710a03SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,0,
1109ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
11101eb62cbbSBarry Smith        0,
1111299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1112299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1113299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1114d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1115299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
1116ff7509f2SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
11178a729477SBarry Smith 
11188a729477SBarry Smith /*@
1119ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1120ff756334SLois Curfman McInnes    (the default parallel PETSc format).
11218a729477SBarry Smith 
11228a729477SBarry Smith    Input Parameters:
11231eb62cbbSBarry Smith .  comm - MPI communicator
11247d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
11257d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
11267d3e4905SLois Curfman McInnes            if N is given)
11277d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
11287d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
11297d3e4905SLois Curfman McInnes            if n is given)
1130ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1131ff756334SLois Curfman McInnes            (same for all local rows)
1132ab693e5aSLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1133ab693e5aSLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1134ab693e5aSLois Curfman McInnes            for the diagonal entry even if it is zero.
1135ab693e5aSLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1136ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
1137ab693e5aSLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1138ab693e5aSLois Curfman McInnes            submatrix or null (possibly different for each row).
11398a729477SBarry Smith 
1140ff756334SLois Curfman McInnes    Output Parameter:
11418a729477SBarry Smith .  newmat - the matrix
11428a729477SBarry Smith 
1143ff756334SLois Curfman McInnes    Notes:
1144ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1145ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
1146ff756334SLois Curfman McInnes    storage.  That is, the stored row and column indices begin at
1147ab693e5aSLois Curfman McInnes    one, not zero.  See the users manual for further details.
1148ff756334SLois Curfman McInnes 
1149ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1150ff756334SLois Curfman McInnes    (possibly both).
1151ff756334SLois Curfman McInnes 
1152e0245417SLois Curfman McInnes    Storage Information:
1153e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1154e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1155e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1156e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1157e0245417SLois Curfman McInnes 
1158e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
1159e0245417SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both). Set both
1160e0245417SLois Curfman McInnes    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1161e0245417SLois Curfman McInnes    Likewise, specify preallocated storage for the off-diagonal part of
1162e0245417SLois Curfman McInnes    the local submatrix with o_nz or o_nnz (not both).
1163ff756334SLois Curfman McInnes 
1164dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1165ff756334SLois Curfman McInnes 
1166dbd7a890SLois Curfman McInnes .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues()
11678a729477SBarry Smith @*/
11681eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
11691eb62cbbSBarry Smith                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
11708a729477SBarry Smith {
11718a729477SBarry Smith   Mat          mat;
117244a69424SLois Curfman McInnes   Mat_MPIAIJ   *aij;
11736abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
11748a729477SBarry Smith   *newmat         = 0;
117544a69424SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1176a5a9c739SBarry Smith   PLogObjectCreate(mat);
117778b31e54SBarry Smith   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
11788a729477SBarry Smith   mat->ops        = &MatOps;
117944a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
118044a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
11818a729477SBarry Smith   mat->factor     = 0;
1182d6dfbf8fSBarry Smith 
118307a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
11841eb62cbbSBarry Smith   MPI_Comm_rank(comm,&aij->mytid);
11851eb62cbbSBarry Smith   MPI_Comm_size(comm,&aij->numtids);
11861eb62cbbSBarry Smith 
1187dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
11881eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1189d6dfbf8fSBarry Smith     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
1190dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1191dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
11921eb62cbbSBarry Smith   }
1193dbd7a890SLois Curfman McInnes   if (m == PETSC_DECIDE)
1194dbd7a890SLois Curfman McInnes     {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
1195dbd7a890SLois Curfman McInnes   if (n == PETSC_DECIDE)
1196dbd7a890SLois Curfman McInnes     {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
11978a729477SBarry Smith   aij->m = m;
11988a729477SBarry Smith   aij->n = n;
11991eb62cbbSBarry Smith   aij->N = N;
12001eb62cbbSBarry Smith   aij->M = M;
12011eb62cbbSBarry Smith 
12021eb62cbbSBarry Smith   /* build local table of row and column ownerships */
120378b31e54SBarry Smith   aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int));
120478b31e54SBarry Smith   CHKPTRQ(aij->rowners);
12051eb62cbbSBarry Smith   aij->cowners = aij->rowners + aij->numtids + 1;
12061eb62cbbSBarry Smith   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
12071eb62cbbSBarry Smith   aij->rowners[0] = 0;
12081eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
12091eb62cbbSBarry Smith     aij->rowners[i] += aij->rowners[i-1];
12108a729477SBarry Smith   }
12111eb62cbbSBarry Smith   aij->rstart = aij->rowners[aij->mytid];
12121eb62cbbSBarry Smith   aij->rend   = aij->rowners[aij->mytid+1];
12131eb62cbbSBarry Smith   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
12141eb62cbbSBarry Smith   aij->cowners[0] = 0;
12151eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
12161eb62cbbSBarry Smith     aij->cowners[i] += aij->cowners[i-1];
12178a729477SBarry Smith   }
12181eb62cbbSBarry Smith   aij->cstart = aij->cowners[aij->mytid];
12191eb62cbbSBarry Smith   aij->cend   = aij->cowners[aij->mytid+1];
12208a729477SBarry Smith 
12218a729477SBarry Smith 
12226b5873e3SBarry Smith   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A);
122378b31e54SBarry Smith   CHKERRQ(ierr);
1224a5a9c739SBarry Smith   PLogObjectParent(mat,aij->A);
12256b5873e3SBarry Smith   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B);
122678b31e54SBarry Smith   CHKERRQ(ierr);
1227a5a9c739SBarry Smith   PLogObjectParent(mat,aij->B);
12288a729477SBarry Smith 
12291eb62cbbSBarry Smith   /* build cache for off array entries formed */
123078b31e54SBarry Smith   ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr);
12319e25ed09SBarry Smith   aij->colmap    = 0;
12329e25ed09SBarry Smith   aij->garray    = 0;
12338a729477SBarry Smith 
12341eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
12351eb62cbbSBarry Smith   aij->lvec      = 0;
12361eb62cbbSBarry Smith   aij->Mvctx     = 0;
1237d6dfbf8fSBarry Smith   aij->assembled = 0;
12388a729477SBarry Smith 
1239d6dfbf8fSBarry Smith   *newmat = mat;
1240d6dfbf8fSBarry Smith   return 0;
1241d6dfbf8fSBarry Smith }
1242c74985f6SBarry Smith 
1243ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1244d6dfbf8fSBarry Smith {
1245d6dfbf8fSBarry Smith   Mat        mat;
124644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
12472ee70a88SLois Curfman McInnes   int        ierr, len;
1248d6dfbf8fSBarry Smith   *newmat      = 0;
1249d6dfbf8fSBarry Smith 
125078b31e54SBarry Smith   if (!oldmat->assembled) SETERRQ(1,"Cannot copy unassembled matrix");
125144a69424SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1252a5a9c739SBarry Smith   PLogObjectCreate(mat);
125378b31e54SBarry Smith   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1254d6dfbf8fSBarry Smith   mat->ops        = &MatOps;
125544a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
125644a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1257d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1258d6dfbf8fSBarry Smith 
1259d6dfbf8fSBarry Smith   aij->m          = oldmat->m;
1260d6dfbf8fSBarry Smith   aij->n          = oldmat->n;
1261d6dfbf8fSBarry Smith   aij->M          = oldmat->M;
1262d6dfbf8fSBarry Smith   aij->N          = oldmat->N;
1263d6dfbf8fSBarry Smith 
1264d6dfbf8fSBarry Smith   aij->assembled  = 1;
1265d6dfbf8fSBarry Smith   aij->rstart     = oldmat->rstart;
1266d6dfbf8fSBarry Smith   aij->rend       = oldmat->rend;
1267d6dfbf8fSBarry Smith   aij->cstart     = oldmat->cstart;
1268d6dfbf8fSBarry Smith   aij->cend       = oldmat->cend;
1269d6dfbf8fSBarry Smith   aij->numtids    = oldmat->numtids;
1270d6dfbf8fSBarry Smith   aij->mytid      = oldmat->mytid;
127107a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
1272d6dfbf8fSBarry Smith 
127378b31e54SBarry Smith   aij->rowners        = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) );
127478b31e54SBarry Smith   CHKPTRQ(aij->rowners);
127578b31e54SBarry Smith   PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
127678b31e54SBarry Smith   ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr);
12772ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
127878b31e54SBarry Smith     aij->colmap      = (int *) PETSCMALLOC( (aij->N)*sizeof(int) );
127978b31e54SBarry Smith     CHKPTRQ(aij->colmap);
128078b31e54SBarry Smith     PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int));
12812ee70a88SLois Curfman McInnes   } else aij->colmap = 0;
12822ee70a88SLois Curfman McInnes   if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) {
128378b31e54SBarry Smith     aij->garray      = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray);
128478b31e54SBarry Smith     PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int));
12852ee70a88SLois Curfman McInnes   } else aij->garray = 0;
1286d6dfbf8fSBarry Smith 
128778b31e54SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr);
1288a5a9c739SBarry Smith   PLogObjectParent(mat,aij->lvec);
128978b31e54SBarry Smith   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr);
1290a5a9c739SBarry Smith   PLogObjectParent(mat,aij->Mvctx);
129178b31e54SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr);
1292a5a9c739SBarry Smith   PLogObjectParent(mat,aij->A);
129378b31e54SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr);
1294a5a9c739SBarry Smith   PLogObjectParent(mat,aij->B);
12958a729477SBarry Smith   *newmat = mat;
12968a729477SBarry Smith   return 0;
12978a729477SBarry Smith }
1298