xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e024541727b18c9b7edf186dfe5afffdd48d6645)
1cb512458SBarry Smith #ifndef lint
2*e0245417SLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.55 1995/07/06 17:47:13 curfman Exp curfman $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1744a69424SLois Curfman McInnes   Mat_AIJ    *B = (Mat_AIJ*) aij->B->data;
189e25ed09SBarry Smith   int        n = B->n,i;
1978b31e54SBarry Smith   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
2078b31e54SBarry Smith   PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int));
21abc0e9e4SLois Curfman McInnes   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
229e25ed09SBarry Smith   return 0;
239e25ed09SBarry Smith }
249e25ed09SBarry Smith 
252493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
262493cbb0SBarry Smith 
272ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
281eb62cbbSBarry Smith                             int *idxn,Scalar *v,InsertMode addv)
298a729477SBarry Smith {
3044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
311eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
321eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
338a729477SBarry Smith 
3407a0e7adSLois Curfman McInnes   if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) {
3578b31e54SBarry Smith     SETERRQ(1,"You cannot mix inserts and adds");
368a729477SBarry Smith   }
371eb62cbbSBarry Smith   aij->insertmode = addv;
388a729477SBarry Smith   for ( i=0; i<m; i++ ) {
3978b31e54SBarry Smith     if (idxm[i] < 0) SETERRQ(1,"Negative row index");
4078b31e54SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(1,"Row index too large");
411eb62cbbSBarry Smith     if (idxm[i] >= rstart && idxm[i] < rend) {
421eb62cbbSBarry Smith       row = idxm[i] - rstart;
431eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
4478b31e54SBarry Smith         if (idxn[j] < 0) SETERRQ(1,"Negative column index");
4578b31e54SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(1,"Column index too large");
461eb62cbbSBarry Smith         if (idxn[j] >= cstart && idxn[j] < cend){
471eb62cbbSBarry Smith           col = idxn[j] - cstart;
4878b31e54SBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
491eb62cbbSBarry Smith         }
501eb62cbbSBarry Smith         else {
51d6dfbf8fSBarry Smith           if (aij->assembled) {
52b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
539e25ed09SBarry Smith             col = aij->colmap[idxn[j]] - 1;
54b7c46309SBarry Smith             if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) {
552493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
562493cbb0SBarry Smith               col =  idxn[j];
57d6dfbf8fSBarry Smith             }
589e25ed09SBarry Smith           }
599e25ed09SBarry Smith           else col = idxn[j];
6078b31e54SBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
611eb62cbbSBarry Smith         }
621eb62cbbSBarry Smith       }
631eb62cbbSBarry Smith     }
641eb62cbbSBarry Smith     else {
65dbd7a890SLois Curfman McInnes       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);
6678b31e54SBarry Smith       CHKERRQ(ierr);
671eb62cbbSBarry Smith     }
688a729477SBarry Smith   }
698a729477SBarry Smith   return 0;
708a729477SBarry Smith }
718a729477SBarry Smith 
728a729477SBarry Smith /*
731eb62cbbSBarry Smith     the assembly code is a lot like the code for vectors, we should
741eb62cbbSBarry Smith     sometime derive a single assembly code that can be used for
751eb62cbbSBarry Smith     either case.
768a729477SBarry Smith */
778a729477SBarry Smith 
78fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798a729477SBarry Smith {
8044a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
81d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
826abc6512SBarry Smith   int         numtids = aij->numtids, *owners = aij->rowners;
831eb62cbbSBarry Smith   int         mytid = aij->mytid;
841eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
856abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
865392566eSBarry Smith   int         tag = mat->tag, *owner,*starts,count,ierr;
871eb62cbbSBarry Smith   InsertMode  addv;
881eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
891eb62cbbSBarry Smith 
901eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
9128988994SBarry Smith   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
921eb62cbbSBarry Smith                 MPI_BOR,comm);
939d00d63dSBarry Smith   if (addv == (ADDVALUES|INSERTVALUES)) {
9478b31e54SBarry Smith     SETERRQ(1,"Some processors have inserted while others have added");
951eb62cbbSBarry Smith   }
961eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
971eb62cbbSBarry Smith 
981eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
9978b31e54SBarry Smith   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
10078b31e54SBarry Smith   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
10178b31e54SBarry Smith   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1021eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1031eb62cbbSBarry Smith     idx = aij->stash.idx[i];
1041eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
1051eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1061eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1078a729477SBarry Smith       }
1088a729477SBarry Smith     }
1098a729477SBarry Smith   }
1101eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
1111eb62cbbSBarry Smith 
1121eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
11378b31e54SBarry Smith   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
1141eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
1151eb62cbbSBarry Smith   nreceives = work[mytid];
1161eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
1171eb62cbbSBarry Smith   nmax = work[mytid];
11878b31e54SBarry Smith   PETSCFREE(work);
1191eb62cbbSBarry Smith 
1201eb62cbbSBarry Smith   /* post receives:
1211eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1221eb62cbbSBarry Smith      (global index,value) we store the global index as a double
123d6dfbf8fSBarry Smith      to simplify the message passing.
1241eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1251eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1261eb62cbbSBarry Smith      this is a lot of wasted space.
1271eb62cbbSBarry Smith 
1281eb62cbbSBarry Smith 
1291eb62cbbSBarry Smith        This could be done better.
1301eb62cbbSBarry Smith   */
13178b31e54SBarry Smith   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
13278b31e54SBarry Smith   CHKPTRQ(rvalues);
13378b31e54SBarry Smith   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
13478b31e54SBarry Smith   CHKPTRQ(recv_waits);
1351eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
136c60448a5SBarry Smith     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1371eb62cbbSBarry Smith               comm,recv_waits+i);
1381eb62cbbSBarry Smith   }
1391eb62cbbSBarry Smith 
1401eb62cbbSBarry Smith   /* do sends:
1411eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1421eb62cbbSBarry Smith          the ith processor
1431eb62cbbSBarry Smith   */
14478b31e54SBarry Smith   svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
14578b31e54SBarry Smith   CHKPTRQ(svalues);
14678b31e54SBarry Smith   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
14778b31e54SBarry Smith   CHKPTRQ(send_waits);
14878b31e54SBarry Smith   starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts);
1491eb62cbbSBarry Smith   starts[0] = 0;
1501eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1511eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1521eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
1531eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
1541eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
1551eb62cbbSBarry Smith   }
15678b31e54SBarry Smith   PETSCFREE(owner);
1571eb62cbbSBarry Smith   starts[0] = 0;
1581eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1591eb62cbbSBarry Smith   count = 0;
1601eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
1611eb62cbbSBarry Smith     if (procs[i]) {
162c60448a5SBarry Smith       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
1631eb62cbbSBarry Smith                 comm,send_waits+count++);
1641eb62cbbSBarry Smith     }
1651eb62cbbSBarry Smith   }
16678b31e54SBarry Smith   PETSCFREE(starts); PETSCFREE(nprocs);
1671eb62cbbSBarry Smith 
1681eb62cbbSBarry Smith   /* Free cache space */
16978b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
1701eb62cbbSBarry Smith 
1711eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues = rvalues;
1721eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs = nreceives;
1731eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
1741eb62cbbSBarry Smith   aij->rmax       = nmax;
1751eb62cbbSBarry Smith 
1761eb62cbbSBarry Smith   return 0;
1771eb62cbbSBarry Smith }
17844a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
1791eb62cbbSBarry Smith 
180fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
1811eb62cbbSBarry Smith {
1821eb62cbbSBarry Smith   int        ierr;
18344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1841eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
1856abc6512SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
1862493cbb0SBarry Smith   int         row,col,other_disassembled;
1871eb62cbbSBarry Smith   Scalar      *values,val;
1881eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
1891eb62cbbSBarry Smith 
1901eb62cbbSBarry Smith   /*  wait on receives */
1911eb62cbbSBarry Smith   while (count) {
192d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
1931eb62cbbSBarry Smith     /* unpack receives into our local space */
194d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
195c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
1961eb62cbbSBarry Smith     n = n/3;
1971eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
1981eb62cbbSBarry Smith       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
1991eb62cbbSBarry Smith       col = (int) PETSCREAL(values[3*i+1]);
2001eb62cbbSBarry Smith       val = values[3*i+2];
2011eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2021eb62cbbSBarry Smith           col -= aij->cstart;
2031eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2041eb62cbbSBarry Smith       }
2051eb62cbbSBarry Smith       else {
206d6dfbf8fSBarry Smith         if (aij->assembled) {
207b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
2089e25ed09SBarry Smith           col = aij->colmap[col] - 1;
209b7c46309SBarry Smith           if (col < 0  && !((Mat_AIJ*)(aij->A->data))->nonew) {
2102493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2112493cbb0SBarry Smith             col = (int) PETSCREAL(values[3*i+1]);
212d6dfbf8fSBarry Smith           }
2139e25ed09SBarry Smith         }
2141eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2151eb62cbbSBarry Smith       }
2161eb62cbbSBarry Smith     }
2171eb62cbbSBarry Smith     count--;
2181eb62cbbSBarry Smith   }
21978b31e54SBarry Smith   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
2201eb62cbbSBarry Smith 
2211eb62cbbSBarry Smith   /* wait on sends */
2221eb62cbbSBarry Smith   if (aij->nsends) {
22378b31e54SBarry Smith     send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) );
22478b31e54SBarry Smith     CHKPTRQ(send_status);
2251eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
22678b31e54SBarry Smith     PETSCFREE(send_status);
2271eb62cbbSBarry Smith   }
22878b31e54SBarry Smith   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
2291eb62cbbSBarry Smith 
23007a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
23178b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
23278b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2331eb62cbbSBarry Smith 
2342493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2352493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
2362493cbb0SBarry Smith   MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1,
2372493cbb0SBarry Smith                  MPI_INT,MPI_PROD,mat->comm);
2382493cbb0SBarry Smith   if (aij->assembled && !other_disassembled) {
2392493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2402493cbb0SBarry Smith   }
2412493cbb0SBarry Smith 
2425e42470aSBarry Smith   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
24378b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
244d6dfbf8fSBarry Smith     aij->assembled = 1;
2455e42470aSBarry Smith   }
24678b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
24778b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2485e42470aSBarry Smith 
2498a729477SBarry Smith   return 0;
2508a729477SBarry Smith }
2518a729477SBarry Smith 
2522ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
2531eb62cbbSBarry Smith {
25444a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
255dbd7a890SLois Curfman McInnes   int ierr;
25678b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
25778b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
2581eb62cbbSBarry Smith   return 0;
2591eb62cbbSBarry Smith }
2601eb62cbbSBarry Smith 
2611eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and
2621eb62cbbSBarry Smith    scatter create routines, we should try to do it systemamatically
2631eb62cbbSBarry Smith    if we can figure out the proper level of generality. */
2641eb62cbbSBarry Smith 
2651eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
2661eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
2671eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
2681eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
2691eb62cbbSBarry Smith    routine.
2701eb62cbbSBarry Smith */
27144a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
2721eb62cbbSBarry Smith {
27344a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
2741eb62cbbSBarry Smith   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
2756abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
2761eb62cbbSBarry Smith   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
2775392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
278d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
279d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
2801eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
2811eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
2821eb62cbbSBarry Smith   IS             istmp;
2831eb62cbbSBarry Smith 
284c8f67822SLois Curfman McInnes   if (!l->assembled)
28578b31e54SBarry Smith     SETERRQ(1,"MatZeroRows_MPIAIJ: Must assemble matrix first");
28678b31e54SBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
28778b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
2881eb62cbbSBarry Smith 
2891eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
29078b31e54SBarry Smith   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
29178b31e54SBarry Smith   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
29278b31e54SBarry Smith   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
2931eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
2941eb62cbbSBarry Smith     idx = rows[i];
2951eb62cbbSBarry Smith     found = 0;
2961eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
2971eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
2981eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2991eb62cbbSBarry Smith       }
3001eb62cbbSBarry Smith     }
30178b31e54SBarry Smith     if (!found) SETERRQ(1,"Index out of range.");
3021eb62cbbSBarry Smith   }
3031eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
3041eb62cbbSBarry Smith 
3051eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
30678b31e54SBarry Smith   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
3071eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
3081eb62cbbSBarry Smith   nrecvs = work[mytid];
3091eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
3101eb62cbbSBarry Smith   nmax = work[mytid];
31178b31e54SBarry Smith   PETSCFREE(work);
3121eb62cbbSBarry Smith 
3131eb62cbbSBarry Smith   /* post receives:   */
31478b31e54SBarry Smith   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
31578b31e54SBarry Smith   CHKPTRQ(rvalues);
31678b31e54SBarry Smith   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
31778b31e54SBarry Smith   CHKPTRQ(recv_waits);
3181eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3191eb62cbbSBarry Smith     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
3201eb62cbbSBarry Smith               comm,recv_waits+i);
3211eb62cbbSBarry Smith   }
3221eb62cbbSBarry Smith 
3231eb62cbbSBarry Smith   /* do sends:
3241eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3251eb62cbbSBarry Smith          the ith processor
3261eb62cbbSBarry Smith   */
32778b31e54SBarry Smith   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
32878b31e54SBarry Smith   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
32978b31e54SBarry Smith   CHKPTRQ(send_waits);
33078b31e54SBarry Smith   starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts);
3311eb62cbbSBarry Smith   starts[0] = 0;
3321eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3331eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3341eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3351eb62cbbSBarry Smith   }
3361eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3371eb62cbbSBarry Smith 
3381eb62cbbSBarry Smith   starts[0] = 0;
3391eb62cbbSBarry Smith   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3401eb62cbbSBarry Smith   count = 0;
3411eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
3421eb62cbbSBarry Smith     if (procs[i]) {
3431eb62cbbSBarry Smith       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
3441eb62cbbSBarry Smith                 comm,send_waits+count++);
3451eb62cbbSBarry Smith     }
3461eb62cbbSBarry Smith   }
34778b31e54SBarry Smith   PETSCFREE(starts);
3481eb62cbbSBarry Smith 
3491eb62cbbSBarry Smith   base = owners[mytid];
3501eb62cbbSBarry Smith 
3511eb62cbbSBarry Smith   /*  wait on receives */
35278b31e54SBarry Smith   lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3531eb62cbbSBarry Smith   source = lens + nrecvs;
3541eb62cbbSBarry Smith   count = nrecvs; slen = 0;
3551eb62cbbSBarry Smith   while (count) {
356d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3571eb62cbbSBarry Smith     /* unpack receives into our local space */
3581eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
359d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
360d6dfbf8fSBarry Smith     lens[imdex]  = n;
3611eb62cbbSBarry Smith     slen += n;
3621eb62cbbSBarry Smith     count--;
3631eb62cbbSBarry Smith   }
36478b31e54SBarry Smith   PETSCFREE(recv_waits);
3651eb62cbbSBarry Smith 
3661eb62cbbSBarry Smith   /* move the data into the send scatter */
36778b31e54SBarry Smith   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
3681eb62cbbSBarry Smith   count = 0;
3691eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3701eb62cbbSBarry Smith     values = rvalues + i*nmax;
3711eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
3721eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
3731eb62cbbSBarry Smith     }
3741eb62cbbSBarry Smith   }
37578b31e54SBarry Smith   PETSCFREE(rvalues); PETSCFREE(lens);
37678b31e54SBarry Smith   PETSCFREE(owner); PETSCFREE(nprocs);
3771eb62cbbSBarry Smith 
3781eb62cbbSBarry Smith   /* actually zap the local rows */
3796b5873e3SBarry Smith   ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp);
38078b31e54SBarry Smith   CHKERRQ(ierr);  PETSCFREE(lrows);
38178b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
38278b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
38378b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
3841eb62cbbSBarry Smith 
3851eb62cbbSBarry Smith   /* wait on sends */
3861eb62cbbSBarry Smith   if (nsends) {
38778b31e54SBarry Smith     send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) );
38878b31e54SBarry Smith     CHKPTRQ(send_status);
3891eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
39078b31e54SBarry Smith     PETSCFREE(send_status);
3911eb62cbbSBarry Smith   }
39278b31e54SBarry Smith   PETSCFREE(send_waits); PETSCFREE(svalues);
3931eb62cbbSBarry Smith 
3941eb62cbbSBarry Smith 
3951eb62cbbSBarry Smith   return 0;
3961eb62cbbSBarry Smith }
3971eb62cbbSBarry Smith 
39844a69424SLois Curfman McInnes static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
3991eb62cbbSBarry Smith {
40044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
4011eb62cbbSBarry Smith   int        ierr;
40278b31e54SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first");
40306be10caSBarry Smith   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
40478b31e54SBarry Smith   CHKERRQ(ierr);
40578b31e54SBarry Smith   ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr);
40606be10caSBarry Smith   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
40778b31e54SBarry Smith   CHKERRQ(ierr);
40878b31e54SBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr);
4091eb62cbbSBarry Smith   return 0;
4101eb62cbbSBarry Smith }
4111eb62cbbSBarry Smith 
41244a69424SLois Curfman McInnes static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
413da3a660dSBarry Smith {
41444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
415da3a660dSBarry Smith   int        ierr;
41678b31e54SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first");
41706be10caSBarry Smith   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
41878b31e54SBarry Smith   CHKERRQ(ierr);
41978b31e54SBarry Smith   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
42006be10caSBarry Smith   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
42178b31e54SBarry Smith   CHKERRQ(ierr);
42278b31e54SBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr);
423da3a660dSBarry Smith   return 0;
424da3a660dSBarry Smith }
425da3a660dSBarry Smith 
42644a69424SLois Curfman McInnes static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
427da3a660dSBarry Smith {
42844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
429da3a660dSBarry Smith   int        ierr;
430da3a660dSBarry Smith 
43144a69424SLois Curfman McInnes   if (!aij->assembled)
43278b31e54SBarry Smith     SETERRQ(1,"MatMulTrans_MPIAIJ: must assemble matrix first");
433da3a660dSBarry Smith   /* do nondiagonal part */
43478b31e54SBarry Smith   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
435da3a660dSBarry Smith   /* send it on its way */
43606be10caSBarry Smith   ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES,
43778b31e54SBarry Smith            (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
438da3a660dSBarry Smith   /* do local part */
43978b31e54SBarry Smith   ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr);
440da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
441da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
442da3a660dSBarry Smith   /* but is not perhaps always true. */
44306be10caSBarry Smith   ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES,
44478b31e54SBarry Smith          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
445da3a660dSBarry Smith   return 0;
446da3a660dSBarry Smith }
447da3a660dSBarry Smith 
44844a69424SLois Curfman McInnes static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
449da3a660dSBarry Smith {
45044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
451da3a660dSBarry Smith   int        ierr;
452da3a660dSBarry Smith 
45344a69424SLois Curfman McInnes   if (!aij->assembled)
45478b31e54SBarry Smith     SETERRQ(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first");
455da3a660dSBarry Smith   /* do nondiagonal part */
45678b31e54SBarry Smith   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
457da3a660dSBarry Smith   /* send it on its way */
45806be10caSBarry Smith   ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES,
45978b31e54SBarry Smith          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
460da3a660dSBarry Smith   /* do local part */
46178b31e54SBarry Smith   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
462da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
463da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
464da3a660dSBarry Smith   /* but is not perhaps always true. */
46506be10caSBarry Smith   ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES,
46678b31e54SBarry Smith        (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
467da3a660dSBarry Smith   return 0;
468da3a660dSBarry Smith }
469da3a660dSBarry Smith 
4701eb62cbbSBarry Smith /*
4711eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
4721eb62cbbSBarry Smith    diagonal block
4731eb62cbbSBarry Smith */
474d1710a03SLois Curfman McInnes static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v)
4751eb62cbbSBarry Smith {
47644a69424SLois Curfman McInnes   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
47778b31e54SBarry Smith   if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ: must assemble matrix first");
4781eb62cbbSBarry Smith   return MatGetDiagonal(A->A,v);
4791eb62cbbSBarry Smith }
4801eb62cbbSBarry Smith 
48144a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
4821eb62cbbSBarry Smith {
4831eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
48444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
4851eb62cbbSBarry Smith   int        ierr;
486a5a9c739SBarry Smith #if defined(PETSC_LOG)
487a5a9c739SBarry Smith   PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N);
488a5a9c739SBarry Smith #endif
48978b31e54SBarry Smith   PETSCFREE(aij->rowners);
49078b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
49178b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
49278b31e54SBarry Smith   if (aij->colmap) PETSCFREE(aij->colmap);
49378b31e54SBarry Smith   if (aij->garray) PETSCFREE(aij->garray);
4941eb62cbbSBarry Smith   if (aij->lvec) VecDestroy(aij->lvec);
4951eb62cbbSBarry Smith   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
49678b31e54SBarry Smith   PETSCFREE(aij);
497a5a9c739SBarry Smith   PLogObjectDestroy(mat);
498a5a9c739SBarry Smith   PETSCHEADERDESTROY(mat);
4991eb62cbbSBarry Smith   return 0;
5001eb62cbbSBarry Smith }
5017857610eSBarry Smith #include "draw.h"
502ee50ffe9SBarry Smith #include "pviewer.h"
503ee50ffe9SBarry Smith 
50444a69424SLois Curfman McInnes static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
5051eb62cbbSBarry Smith {
5061eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
50744a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5081eb62cbbSBarry Smith   int        ierr;
509d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
5101eb62cbbSBarry Smith 
51178b31e54SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ: must assemble matrix first");
512154123eaSLois Curfman McInnes   if (!viewer) { /* so that viewers may be used from debuggers */
513154123eaSLois Curfman McInnes     viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer;
514154123eaSLois Curfman McInnes   }
515ab254492SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
5167857610eSBarry Smith   if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) {
5174a0ce102SLois Curfman McInnes     FILE *fd = ViewerFileGetPointer_Private(viewer);
5187f813858SBarry Smith     MPIU_Seq_begin(mat->comm,1);
519d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
5201eb62cbbSBarry Smith              aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5211eb62cbbSBarry Smith              aij->cend);
52278b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
52378b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
524d13ab20cSBarry Smith     fflush(fd);
5257f813858SBarry Smith     MPIU_Seq_end(mat->comm,1);
526d13ab20cSBarry Smith   }
5277857610eSBarry Smith   else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) ||
5287857610eSBarry Smith             vobj->cookie == DRAW_COOKIE) {
52995373324SBarry Smith     int numtids = aij->numtids, mytid = aij->mytid;
53095373324SBarry Smith     if (numtids == 1) {
53178b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
53295373324SBarry Smith     }
53395373324SBarry Smith     else {
53495373324SBarry Smith       /* assemble the entire matrix onto first processor. */
53595373324SBarry Smith       Mat     A;
5362ee70a88SLois Curfman McInnes       Mat_AIJ *Aloc;
5372eb8c8abSBarry Smith       int     M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
53895373324SBarry Smith       Scalar  *a;
5392ee70a88SLois Curfman McInnes 
54095373324SBarry Smith       if (!mytid) {
54195373324SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);
54295373324SBarry Smith       }
54395373324SBarry Smith       else {
54495373324SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);
54595373324SBarry Smith       }
54678b31e54SBarry Smith       CHKERRQ(ierr);
54795373324SBarry Smith 
54895373324SBarry Smith       /* copy over the A part */
5492ee70a88SLois Curfman McInnes       Aloc = (Mat_AIJ*) aij->A->data;
5502ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
55195373324SBarry Smith       row = aij->rstart;
552b7c46309SBarry Smith       for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;}
55395373324SBarry Smith       for ( i=0; i<m; i++ ) {
55407a0e7adSLois Curfman McInnes         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES);
55578b31e54SBarry Smith         CHKERRQ(ierr);
55695373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
55795373324SBarry Smith       }
5582ee70a88SLois Curfman McInnes       aj = Aloc->j;
559b7c46309SBarry Smith       for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;}
56095373324SBarry Smith 
56195373324SBarry Smith       /* copy over the B part */
5622ee70a88SLois Curfman McInnes       Aloc = (Mat_AIJ*) aij->B->data;
5632ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
56495373324SBarry Smith       row = aij->rstart;
56578b31e54SBarry Smith       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
566b7c46309SBarry Smith       for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];}
56795373324SBarry Smith       for ( i=0; i<m; i++ ) {
56807a0e7adSLois Curfman McInnes         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES);
56978b31e54SBarry Smith         CHKERRQ(ierr);
57095373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
57195373324SBarry Smith       }
57278b31e54SBarry Smith       PETSCFREE(ct);
57378b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
57478b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
57595373324SBarry Smith       if (!mytid) {
57678b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
57795373324SBarry Smith       }
57878b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
57995373324SBarry Smith     }
58095373324SBarry Smith   }
5811eb62cbbSBarry Smith   return 0;
5821eb62cbbSBarry Smith }
5831eb62cbbSBarry Smith 
584d3c9fed9SLois Curfman McInnes extern int MatMarkDiag_AIJ(Mat_AIJ  *);
5851eb62cbbSBarry Smith /*
5861eb62cbbSBarry Smith     This has to provide several versions.
5871eb62cbbSBarry Smith 
5881eb62cbbSBarry Smith      1) per sequential
5891eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
5901eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
591d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
5921eb62cbbSBarry Smith */
593fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
594fc76ce87SLois Curfman McInnes                            double shift,int its,Vec xx)
5958a729477SBarry Smith {
59644a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
597d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
59844a69424SLois Curfman McInnes   Mat_AIJ    *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data;
5996abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
6006abc6512SBarry Smith   int        ierr,*idx, *diag;
6016abc6512SBarry Smith   int        n = mat->n, m = mat->m, i;
602da3a660dSBarry Smith   Vec        tt;
6038a729477SBarry Smith 
60478b31e54SBarry Smith   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ: must assemble matrix first");
6051eb62cbbSBarry Smith 
606d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
607d6dfbf8fSBarry Smith   xs = x -1; /* shift by one for index start of 1 */
608da3a660dSBarry Smith   ls--;
609d3c9fed9SLois Curfman McInnes   if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;}
610d6dfbf8fSBarry Smith   diag = A->diag;
611acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
61278b31e54SBarry Smith     SETERRQ(1,"That option not yet support for parallel AIJ matrices");
613acb40c82SBarry Smith   }
614da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
615da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
616da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
617da3a660dSBarry Smith 
618da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
619da3a660dSBarry Smith 
620da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
621da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
622da3a660dSBarry Smith     is the relaxation factor.
623da3a660dSBarry Smith     */
62478b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
625da3a660dSBarry Smith     VecGetArray(tt,&t);
626da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
627da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
628da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
62906be10caSBarry Smith     ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
63078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
631da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
632da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
633da3a660dSBarry Smith       idx  = A->j + diag[i];
634da3a660dSBarry Smith       v    = A->a + diag[i];
635da3a660dSBarry Smith       sum  = b[i];
636da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
637da3a660dSBarry Smith       d    = shift + A->a[diag[i]-1];
638da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
639da3a660dSBarry Smith       idx  = B->j + B->i[i] - 1;
640da3a660dSBarry Smith       v    = B->a + B->i[i] - 1;
641da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
642da3a660dSBarry Smith       x[i] = omega*(sum/d);
643da3a660dSBarry Smith     }
644edd2f0e1SBarry Smith     ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
64578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
646da3a660dSBarry Smith 
647da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
648da3a660dSBarry Smith     v = A->a;
649da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
650da3a660dSBarry Smith 
651da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
652da3a660dSBarry Smith     ts = t - 1; /* shifted by one for index start of a or mat->j*/
653da3a660dSBarry Smith     diag = A->diag;
654da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
65506be10caSBarry Smith     ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
65678b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
657da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
658da3a660dSBarry Smith       n    = diag[i] - A->i[i];
659da3a660dSBarry Smith       idx  = A->j + A->i[i] - 1;
660da3a660dSBarry Smith       v    = A->a + A->i[i] - 1;
661da3a660dSBarry Smith       sum  = t[i];
662da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
663da3a660dSBarry Smith       d    = shift + A->a[diag[i]-1];
664da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
665da3a660dSBarry Smith       idx  = B->j + B->i[i] - 1;
666da3a660dSBarry Smith       v    = B->a + B->i[i] - 1;
667da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
668da3a660dSBarry Smith       t[i] = omega*(sum/d);
669da3a660dSBarry Smith     }
670edd2f0e1SBarry Smith     ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
67178b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
672da3a660dSBarry Smith     /*  x = x + t */
673da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
674da3a660dSBarry Smith     VecDestroy(tt);
675da3a660dSBarry Smith     return 0;
676da3a660dSBarry Smith   }
677da3a660dSBarry Smith 
6781eb62cbbSBarry Smith 
679d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
680da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
681da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
682da3a660dSBarry Smith     }
683da3a660dSBarry Smith     else {
68406be10caSBarry Smith       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
68578b31e54SBarry Smith       CHKERRQ(ierr);
68606be10caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
68778b31e54SBarry Smith       CHKERRQ(ierr);
688da3a660dSBarry Smith     }
689d6dfbf8fSBarry Smith     while (its--) {
690d6dfbf8fSBarry Smith       /* go down through the rows */
69106be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
69278b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
693d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
694d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
695d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
696d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
697d6dfbf8fSBarry Smith         sum  = b[i];
698d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
699d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
700d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
701d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
702d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
703d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
704d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
705d6dfbf8fSBarry Smith       }
706edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
70778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
708d6dfbf8fSBarry Smith       /* come up through the rows */
70906be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
71078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
711d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
712d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
713d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
714d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
715d6dfbf8fSBarry Smith         sum  = b[i];
716d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
717d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
718d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
719d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
720d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
721d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
722d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
723d6dfbf8fSBarry Smith       }
724edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
72578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
726d6dfbf8fSBarry Smith     }
727d6dfbf8fSBarry Smith   }
728d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
729da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
730da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
73106be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
73278b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
733da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
734da3a660dSBarry Smith         n    = diag[i] - A->i[i];
735da3a660dSBarry Smith         idx  = A->j + A->i[i] - 1;
736da3a660dSBarry Smith         v    = A->a + A->i[i] - 1;
737da3a660dSBarry Smith         sum  = b[i];
738da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
739da3a660dSBarry Smith         d    = shift + A->a[diag[i]-1];
740da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
741da3a660dSBarry Smith         idx  = B->j + B->i[i] - 1;
742da3a660dSBarry Smith         v    = B->a + B->i[i] - 1;
743da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
744da3a660dSBarry Smith         x[i] = omega*(sum/d);
745da3a660dSBarry Smith       }
746edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
74778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
748da3a660dSBarry Smith       its--;
749da3a660dSBarry Smith     }
750d6dfbf8fSBarry Smith     while (its--) {
75106be10caSBarry Smith       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
75278b31e54SBarry Smith       CHKERRQ(ierr);
75306be10caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
75478b31e54SBarry Smith       CHKERRQ(ierr);
75506be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
75678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
757d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
758d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
759d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
760d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
761d6dfbf8fSBarry Smith         sum  = b[i];
762d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
763d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
764d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
765d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
766d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
767d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
768d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
769d6dfbf8fSBarry Smith       }
770edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
77178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
772d6dfbf8fSBarry Smith     }
773d6dfbf8fSBarry Smith   }
774d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
775da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
776da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
77706be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
77878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
779da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
780da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
781da3a660dSBarry Smith         idx  = A->j + diag[i];
782da3a660dSBarry Smith         v    = A->a + diag[i];
783da3a660dSBarry Smith         sum  = b[i];
784da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
785da3a660dSBarry Smith         d    = shift + A->a[diag[i]-1];
786da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
787da3a660dSBarry Smith         idx  = B->j + B->i[i] - 1;
788da3a660dSBarry Smith         v    = B->a + B->i[i] - 1;
789da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
790da3a660dSBarry Smith         x[i] = omega*(sum/d);
791da3a660dSBarry Smith       }
792edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
79378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
794da3a660dSBarry Smith       its--;
795da3a660dSBarry Smith     }
796d6dfbf8fSBarry Smith     while (its--) {
79706be10caSBarry Smith       ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
79878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
79906be10caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
80078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
80106be10caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
80278b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
803d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
804d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
805d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
806d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
807d6dfbf8fSBarry Smith         sum  = b[i];
808d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
809d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
810d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
811d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
812d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
813d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
814d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
815d6dfbf8fSBarry Smith       }
816edd2f0e1SBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
81778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
818d6dfbf8fSBarry Smith     }
819d6dfbf8fSBarry Smith   }
820d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
821da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
822da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
823da3a660dSBarry Smith     }
82406be10caSBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
82578b31e54SBarry Smith     CHKERRQ(ierr);
82606be10caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
82778b31e54SBarry Smith     CHKERRQ(ierr);
828d6dfbf8fSBarry Smith     while (its--) {
829d6dfbf8fSBarry Smith       /* go down through the rows */
830d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
831d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
832d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
833d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
834d6dfbf8fSBarry Smith         sum  = b[i];
835d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
836d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
837d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
838d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
839d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
840d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
841d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
842d6dfbf8fSBarry Smith       }
843d6dfbf8fSBarry Smith       /* come up through the rows */
844d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
845d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
846d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
847d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
848d6dfbf8fSBarry Smith         sum  = b[i];
849d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
850d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
851d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
852d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
853d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
854d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
855d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
856d6dfbf8fSBarry Smith       }
857d6dfbf8fSBarry Smith     }
858d6dfbf8fSBarry Smith   }
859d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
860da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
861da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
862da3a660dSBarry Smith     }
86306be10caSBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
86478b31e54SBarry Smith     CHKERRQ(ierr);
86506be10caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
86678b31e54SBarry Smith     CHKERRQ(ierr);
867d6dfbf8fSBarry Smith     while (its--) {
868d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
869d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
870d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
871d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
872d6dfbf8fSBarry Smith         sum  = b[i];
873d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
874d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
875d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
876d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
877d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
878d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
879d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
880d6dfbf8fSBarry Smith       }
881d6dfbf8fSBarry Smith     }
882d6dfbf8fSBarry Smith   }
883d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
884da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
885da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
886da3a660dSBarry Smith     }
88706be10caSBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,
88878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
88906be10caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,
89078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
891d6dfbf8fSBarry Smith     while (its--) {
892d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
893d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
894d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
895d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
896d6dfbf8fSBarry Smith         sum  = b[i];
897d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
898d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
899d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
900d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
901d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
902d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
903d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
904d6dfbf8fSBarry Smith       }
905d6dfbf8fSBarry Smith     }
906d6dfbf8fSBarry Smith   }
9078a729477SBarry Smith   return 0;
9088a729477SBarry Smith }
909a66be287SLois Curfman McInnes 
910fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
911fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
912a66be287SLois Curfman McInnes {
913a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
914a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
915a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
916a66be287SLois Curfman McInnes 
91778b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
91878b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
919a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
920a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
921a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
922a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
923a66be287SLois Curfman McInnes     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm);
924a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
925a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
926a66be287SLois Curfman McInnes     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm);
927a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
928a66be287SLois Curfman McInnes   }
929a66be287SLois Curfman McInnes   return 0;
930a66be287SLois Curfman McInnes }
931a66be287SLois Curfman McInnes 
932fc76ce87SLois Curfman McInnes static int MatSetOption_MPIAIJ(Mat aijin,MatOption op)
933c74985f6SBarry Smith {
93444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
935c74985f6SBarry Smith 
936c74985f6SBarry Smith   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
937c74985f6SBarry Smith     MatSetOption(aij->A,op);
938c74985f6SBarry Smith     MatSetOption(aij->B,op);
939c74985f6SBarry Smith   }
940c74985f6SBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) {
941c74985f6SBarry Smith     MatSetOption(aij->A,op);
942c74985f6SBarry Smith     MatSetOption(aij->B,op);
943c74985f6SBarry Smith   }
94478b31e54SBarry Smith   else if (op == COLUMN_ORIENTED) SETERRQ(1,"Column oriented not supported");
945c74985f6SBarry Smith   return 0;
946c74985f6SBarry Smith }
947c74985f6SBarry Smith 
948d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
949c74985f6SBarry Smith {
95044a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
951c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
952c74985f6SBarry Smith   return 0;
953c74985f6SBarry Smith }
954c74985f6SBarry Smith 
955d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
956c74985f6SBarry Smith {
95744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
958b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
959c74985f6SBarry Smith   return 0;
960c74985f6SBarry Smith }
961c74985f6SBarry Smith 
962d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
963c74985f6SBarry Smith {
96444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
965c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
966c74985f6SBarry Smith   return 0;
967c74985f6SBarry Smith }
968c74985f6SBarry Smith 
96939e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
97039e00950SLois Curfman McInnes {
971154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
972154123eaSLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
973154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
974154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
97539e00950SLois Curfman McInnes 
97639e00950SLois Curfman McInnes   if (row < rstart || row >= rend)
97778b31e54SBarry Smith     SETERRQ(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.")
978abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
97939e00950SLois Curfman McInnes 
980154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
981154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
982154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
98378b31e54SBarry Smith   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
98478b31e54SBarry Smith   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
985154123eaSLois Curfman McInnes   nztot = nzA + nzB;
986154123eaSLois Curfman McInnes 
987154123eaSLois Curfman McInnes   if (v  || idx) {
988154123eaSLois Curfman McInnes     if (nztot) {
989154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
990154123eaSLois Curfman McInnes       int imark, imark2;
99148b35521SBarry Smith       if (mat->assembled) {
992154123eaSLois Curfman McInnes         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
99348b35521SBarry Smith       }
994154123eaSLois Curfman McInnes       if (v) {
99578b31e54SBarry Smith         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
99639e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
997154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
998154123eaSLois Curfman McInnes           else break;
999154123eaSLois Curfman McInnes         }
1000154123eaSLois Curfman McInnes         imark = i;
1001154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1002154123eaSLois Curfman McInnes         imark2 = imark+nzA;
1003154123eaSLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[imark2+i] = vworkB[i];
1004154123eaSLois Curfman McInnes       }
1005154123eaSLois Curfman McInnes       if (idx) {
100678b31e54SBarry Smith         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1007154123eaSLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1008154123eaSLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1009154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1010154123eaSLois Curfman McInnes           else break;
1011154123eaSLois Curfman McInnes         }
1012154123eaSLois Curfman McInnes         imark = i;
1013154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1014154123eaSLois Curfman McInnes         imark2 = imark+nzA;
1015154123eaSLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[imark2+i] = cworkB[i];
101639e00950SLois Curfman McInnes       }
101739e00950SLois Curfman McInnes     }
101839e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1019154123eaSLois Curfman McInnes   }
102039e00950SLois Curfman McInnes   *nz = nztot;
102178b31e54SBarry Smith   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
102278b31e54SBarry Smith   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
102339e00950SLois Curfman McInnes   return 0;
102439e00950SLois Curfman McInnes }
102539e00950SLois Curfman McInnes 
102639e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
102739e00950SLois Curfman McInnes {
102878b31e54SBarry Smith   if (idx) PETSCFREE(*idx);
102978b31e54SBarry Smith   if (v) PETSCFREE(*v);
103039e00950SLois Curfman McInnes   return 0;
103139e00950SLois Curfman McInnes }
103239e00950SLois Curfman McInnes 
1033b7c46309SBarry Smith static int MatTranspose_MPIAIJ(Mat A,Mat *Bin)
1034b7c46309SBarry Smith {
1035b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1036b7c46309SBarry Smith   int        ierr;
1037b7c46309SBarry Smith   Mat        B;
1038b7c46309SBarry Smith   Mat_AIJ    *Aloc;
1039b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1040b7c46309SBarry Smith   Scalar     *array;
1041b7c46309SBarry Smith 
1042b7c46309SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1043b7c46309SBarry Smith   CHKERRQ(ierr);
1044b7c46309SBarry Smith 
1045b7c46309SBarry Smith   /* copy over the A part */
1046b7c46309SBarry Smith   Aloc = (Mat_AIJ*) a->A->data;
1047b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1048b7c46309SBarry Smith   row = a->rstart;
1049b7c46309SBarry Smith   for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;}
1050b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1051b7c46309SBarry Smith       ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES);
1052b7c46309SBarry Smith       CHKERRQ(ierr);
1053b7c46309SBarry Smith       row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1054b7c46309SBarry Smith   }
1055b7c46309SBarry Smith   aj = Aloc->j;
1056b7c46309SBarry Smith   for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;}
1057b7c46309SBarry Smith 
1058b7c46309SBarry Smith   /* copy over the B part */
1059b7c46309SBarry Smith   Aloc = (Mat_AIJ*) a->B->data;
1060b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1061b7c46309SBarry Smith   row = a->rstart;
1062b7c46309SBarry Smith   ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
1063b7c46309SBarry Smith   for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];}
1064b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1065b7c46309SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES);
1066b7c46309SBarry Smith     CHKERRQ(ierr);
1067b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1068b7c46309SBarry Smith   }
1069b7c46309SBarry Smith   PETSCFREE(ct);
1070b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1071b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1072b7c46309SBarry Smith   *Bin = B;
1073b7c46309SBarry Smith   return 0;
1074b7c46309SBarry Smith }
1075b7c46309SBarry Smith 
1076fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1077ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1078d6dfbf8fSBarry Smith 
10798a729477SBarry Smith /* -------------------------------------------------------------------*/
10802ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
108139e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
108244a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
108344a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
10841eb62cbbSBarry Smith        0,0,0,0,
10858a729477SBarry Smith        0,0,
108644a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1087b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1088a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1089d1710a03SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,0,
1090ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
10911eb62cbbSBarry Smith        0,
10922ee70a88SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,0,
1093c74985f6SBarry Smith        0,0,0,0,
1094d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
109577915d1cSLois Curfman McInnes        0,0,
1096ff7509f2SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
10978a729477SBarry Smith 
10988a729477SBarry Smith /*@
1099ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1100ff756334SLois Curfman McInnes    (the default parallel PETSc format).
11018a729477SBarry Smith 
11028a729477SBarry Smith    Input Parameters:
11031eb62cbbSBarry Smith .  comm - MPI communicator
11047d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
11057d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
11067d3e4905SLois Curfman McInnes            if N is given)
11077d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
11087d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
11097d3e4905SLois Curfman McInnes            if n is given)
1110ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1111ff756334SLois Curfman McInnes            (same for all local rows)
1112ab693e5aSLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1113ab693e5aSLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1114ab693e5aSLois Curfman McInnes            for the diagonal entry even if it is zero.
1115ab693e5aSLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1116ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
1117ab693e5aSLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1118ab693e5aSLois Curfman McInnes            submatrix or null (possibly different for each row).
11198a729477SBarry Smith 
1120ff756334SLois Curfman McInnes    Output Parameter:
11218a729477SBarry Smith .  newmat - the matrix
11228a729477SBarry Smith 
1123ff756334SLois Curfman McInnes    Notes:
1124ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1125ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
1126ff756334SLois Curfman McInnes    storage.  That is, the stored row and column indices begin at
1127ab693e5aSLois Curfman McInnes    one, not zero.  See the users manual for further details.
1128ff756334SLois Curfman McInnes 
1129ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1130ff756334SLois Curfman McInnes    (possibly both).
1131ff756334SLois Curfman McInnes 
1132*e0245417SLois Curfman McInnes    Storage Information:
1133*e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1134*e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1135*e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1136*e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1137*e0245417SLois Curfman McInnes 
1138*e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
1139*e0245417SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both). Set both
1140*e0245417SLois Curfman McInnes    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1141*e0245417SLois Curfman McInnes    Likewise, specify preallocated storage for the off-diagonal part of
1142*e0245417SLois Curfman McInnes    the local submatrix with o_nz or o_nnz (not both).
1143ff756334SLois Curfman McInnes 
1144dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1145ff756334SLois Curfman McInnes 
1146dbd7a890SLois Curfman McInnes .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues()
11478a729477SBarry Smith @*/
11481eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
11491eb62cbbSBarry Smith                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
11508a729477SBarry Smith {
11518a729477SBarry Smith   Mat          mat;
115244a69424SLois Curfman McInnes   Mat_MPIAIJ   *aij;
11536abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
11548a729477SBarry Smith   *newmat         = 0;
115544a69424SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1156a5a9c739SBarry Smith   PLogObjectCreate(mat);
115778b31e54SBarry Smith   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
11588a729477SBarry Smith   mat->ops        = &MatOps;
115944a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
116044a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
11618a729477SBarry Smith   mat->factor     = 0;
1162d6dfbf8fSBarry Smith 
116307a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
11641eb62cbbSBarry Smith   MPI_Comm_rank(comm,&aij->mytid);
11651eb62cbbSBarry Smith   MPI_Comm_size(comm,&aij->numtids);
11661eb62cbbSBarry Smith 
1167dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
11681eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1169d6dfbf8fSBarry Smith     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
1170dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1171dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
11721eb62cbbSBarry Smith   }
1173dbd7a890SLois Curfman McInnes   if (m == PETSC_DECIDE)
1174dbd7a890SLois Curfman McInnes     {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
1175dbd7a890SLois Curfman McInnes   if (n == PETSC_DECIDE)
1176dbd7a890SLois Curfman McInnes     {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
11778a729477SBarry Smith   aij->m = m;
11788a729477SBarry Smith   aij->n = n;
11791eb62cbbSBarry Smith   aij->N = N;
11801eb62cbbSBarry Smith   aij->M = M;
11811eb62cbbSBarry Smith 
11821eb62cbbSBarry Smith   /* build local table of row and column ownerships */
118378b31e54SBarry Smith   aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int));
118478b31e54SBarry Smith   CHKPTRQ(aij->rowners);
11851eb62cbbSBarry Smith   aij->cowners = aij->rowners + aij->numtids + 1;
11861eb62cbbSBarry Smith   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
11871eb62cbbSBarry Smith   aij->rowners[0] = 0;
11881eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
11891eb62cbbSBarry Smith     aij->rowners[i] += aij->rowners[i-1];
11908a729477SBarry Smith   }
11911eb62cbbSBarry Smith   aij->rstart = aij->rowners[aij->mytid];
11921eb62cbbSBarry Smith   aij->rend   = aij->rowners[aij->mytid+1];
11931eb62cbbSBarry Smith   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
11941eb62cbbSBarry Smith   aij->cowners[0] = 0;
11951eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
11961eb62cbbSBarry Smith     aij->cowners[i] += aij->cowners[i-1];
11978a729477SBarry Smith   }
11981eb62cbbSBarry Smith   aij->cstart = aij->cowners[aij->mytid];
11991eb62cbbSBarry Smith   aij->cend   = aij->cowners[aij->mytid+1];
12008a729477SBarry Smith 
12018a729477SBarry Smith 
12026b5873e3SBarry Smith   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A);
120378b31e54SBarry Smith   CHKERRQ(ierr);
1204a5a9c739SBarry Smith   PLogObjectParent(mat,aij->A);
12056b5873e3SBarry Smith   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B);
120678b31e54SBarry Smith   CHKERRQ(ierr);
1207a5a9c739SBarry Smith   PLogObjectParent(mat,aij->B);
12088a729477SBarry Smith 
12091eb62cbbSBarry Smith   /* build cache for off array entries formed */
121078b31e54SBarry Smith   ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr);
12119e25ed09SBarry Smith   aij->colmap    = 0;
12129e25ed09SBarry Smith   aij->garray    = 0;
12138a729477SBarry Smith 
12141eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
12151eb62cbbSBarry Smith   aij->lvec      = 0;
12161eb62cbbSBarry Smith   aij->Mvctx     = 0;
1217d6dfbf8fSBarry Smith   aij->assembled = 0;
12188a729477SBarry Smith 
1219d6dfbf8fSBarry Smith   *newmat = mat;
1220d6dfbf8fSBarry Smith   return 0;
1221d6dfbf8fSBarry Smith }
1222c74985f6SBarry Smith 
1223ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1224d6dfbf8fSBarry Smith {
1225d6dfbf8fSBarry Smith   Mat        mat;
122644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
12272ee70a88SLois Curfman McInnes   int        ierr, len;
1228d6dfbf8fSBarry Smith   *newmat      = 0;
1229d6dfbf8fSBarry Smith 
123078b31e54SBarry Smith   if (!oldmat->assembled) SETERRQ(1,"Cannot copy unassembled matrix");
123144a69424SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1232a5a9c739SBarry Smith   PLogObjectCreate(mat);
123378b31e54SBarry Smith   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1234d6dfbf8fSBarry Smith   mat->ops        = &MatOps;
123544a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
123644a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1237d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1238d6dfbf8fSBarry Smith 
1239d6dfbf8fSBarry Smith   aij->m          = oldmat->m;
1240d6dfbf8fSBarry Smith   aij->n          = oldmat->n;
1241d6dfbf8fSBarry Smith   aij->M          = oldmat->M;
1242d6dfbf8fSBarry Smith   aij->N          = oldmat->N;
1243d6dfbf8fSBarry Smith 
1244d6dfbf8fSBarry Smith   aij->assembled  = 1;
1245d6dfbf8fSBarry Smith   aij->rstart     = oldmat->rstart;
1246d6dfbf8fSBarry Smith   aij->rend       = oldmat->rend;
1247d6dfbf8fSBarry Smith   aij->cstart     = oldmat->cstart;
1248d6dfbf8fSBarry Smith   aij->cend       = oldmat->cend;
1249d6dfbf8fSBarry Smith   aij->numtids    = oldmat->numtids;
1250d6dfbf8fSBarry Smith   aij->mytid      = oldmat->mytid;
125107a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
1252d6dfbf8fSBarry Smith 
125378b31e54SBarry Smith   aij->rowners        = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) );
125478b31e54SBarry Smith   CHKPTRQ(aij->rowners);
125578b31e54SBarry Smith   PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
125678b31e54SBarry Smith   ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr);
12572ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
125878b31e54SBarry Smith     aij->colmap      = (int *) PETSCMALLOC( (aij->N)*sizeof(int) );
125978b31e54SBarry Smith     CHKPTRQ(aij->colmap);
126078b31e54SBarry Smith     PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int));
12612ee70a88SLois Curfman McInnes   } else aij->colmap = 0;
12622ee70a88SLois Curfman McInnes   if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) {
126378b31e54SBarry Smith     aij->garray      = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray);
126478b31e54SBarry Smith     PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int));
12652ee70a88SLois Curfman McInnes   } else aij->garray = 0;
1266d6dfbf8fSBarry Smith 
126778b31e54SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr);
1268a5a9c739SBarry Smith   PLogObjectParent(mat,aij->lvec);
126978b31e54SBarry Smith   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr);
1270a5a9c739SBarry Smith   PLogObjectParent(mat,aij->Mvctx);
127178b31e54SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr);
1272a5a9c739SBarry Smith   PLogObjectParent(mat,aij->A);
127378b31e54SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr);
1274a5a9c739SBarry Smith   PLogObjectParent(mat,aij->B);
12758a729477SBarry Smith   *newmat = mat;
12768a729477SBarry Smith   return 0;
12778a729477SBarry Smith }
1278