xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision dbb450ca0a1bd4568a196678f20f5151425d3a04)
1cb512458SBarry Smith #ifndef lint
2*dbb450caSBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.79 1995/09/12 03:25:20 bsmith Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
189e25ed09SBarry Smith   int        n = B->n,i;
19*dbb450caSBarry Smith   int        shift = B->indexshift;
20*dbb450caSBarry Smith 
2178b31e54SBarry Smith   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
22464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
2378b31e54SBarry Smith   PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int));
24*dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
259e25ed09SBarry Smith   return 0;
269e25ed09SBarry Smith }
279e25ed09SBarry Smith 
282493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
292493cbb0SBarry Smith 
3051c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
31299609e3SLois Curfman McInnes {
32299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
33299609e3SLois Curfman McInnes   int ierr;
34299609e3SLois Curfman McInnes   if (aij->numtids == 1) {
3551c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
36299609e3SLois Curfman McInnes   } else
37bbb6d6a8SBarry Smith     SETERRQ(1,"MatGetReordering_MPIAIJ:  not yet supported in parallel");
38299609e3SLois Curfman McInnes   return 0;
39299609e3SLois Curfman McInnes }
40299609e3SLois Curfman McInnes 
412ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
421eb62cbbSBarry Smith                             int *idxn,Scalar *v,InsertMode addv)
438a729477SBarry Smith {
4444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
45*dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
461eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
471eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
48*dbb450caSBarry Smith   int        shift = C->indexshift;
498a729477SBarry Smith 
5007a0e7adSLois Curfman McInnes   if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) {
51bbb6d6a8SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:You cannot mix inserts and adds");
528a729477SBarry Smith   }
531eb62cbbSBarry Smith   aij->insertmode = addv;
548a729477SBarry Smith   for ( i=0; i<m; i++ ) {
55bbb6d6a8SBarry Smith     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
56bbb6d6a8SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
571eb62cbbSBarry Smith     if (idxm[i] >= rstart && idxm[i] < rend) {
581eb62cbbSBarry Smith       row = idxm[i] - rstart;
591eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
60bbb6d6a8SBarry Smith         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
61bbb6d6a8SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
621eb62cbbSBarry Smith         if (idxn[j] >= cstart && idxn[j] < cend){
631eb62cbbSBarry Smith           col = idxn[j] - cstart;
6478b31e54SBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
651eb62cbbSBarry Smith         }
661eb62cbbSBarry Smith         else {
67d6dfbf8fSBarry Smith           if (aij->assembled) {
68b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
69*dbb450caSBarry Smith             col = aij->colmap[idxn[j]] + shift;
70ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
712493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
722493cbb0SBarry Smith               col =  idxn[j];
73d6dfbf8fSBarry Smith             }
749e25ed09SBarry Smith           }
759e25ed09SBarry Smith           else col = idxn[j];
7678b31e54SBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
771eb62cbbSBarry Smith         }
781eb62cbbSBarry Smith       }
791eb62cbbSBarry Smith     }
801eb62cbbSBarry Smith     else {
81dbd7a890SLois Curfman McInnes       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);
8278b31e54SBarry Smith       CHKERRQ(ierr);
831eb62cbbSBarry Smith     }
848a729477SBarry Smith   }
858a729477SBarry Smith   return 0;
868a729477SBarry Smith }
878a729477SBarry Smith 
888a729477SBarry Smith /*
891eb62cbbSBarry Smith     the assembly code is a lot like the code for vectors, we should
901eb62cbbSBarry Smith     sometime derive a single assembly code that can be used for
911eb62cbbSBarry Smith     either case.
928a729477SBarry Smith */
938a729477SBarry Smith 
94fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
958a729477SBarry Smith {
9644a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
97d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
986abc6512SBarry Smith   int         numtids = aij->numtids, *owners = aij->rowners;
991eb62cbbSBarry Smith   int         mytid = aij->mytid;
1001eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1016abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1025392566eSBarry Smith   int         tag = mat->tag, *owner,*starts,count,ierr;
1031eb62cbbSBarry Smith   InsertMode  addv;
1041eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1051eb62cbbSBarry Smith 
1061eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
10728988994SBarry Smith   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
1081eb62cbbSBarry Smith                 MPI_BOR,comm);
109*dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
110bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1111eb62cbbSBarry Smith   }
1121eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1131eb62cbbSBarry Smith 
1141eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
11578b31e54SBarry Smith   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
11678b31e54SBarry Smith   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
11778b31e54SBarry Smith   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1181eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1191eb62cbbSBarry Smith     idx = aij->stash.idx[i];
1201eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
1211eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1221eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1238a729477SBarry Smith       }
1248a729477SBarry Smith     }
1258a729477SBarry Smith   }
1261eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
1271eb62cbbSBarry Smith 
1281eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
12978b31e54SBarry Smith   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
1301eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
1311eb62cbbSBarry Smith   nreceives = work[mytid];
1321eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
1331eb62cbbSBarry Smith   nmax = work[mytid];
13478b31e54SBarry Smith   PETSCFREE(work);
1351eb62cbbSBarry Smith 
1361eb62cbbSBarry Smith   /* post receives:
1371eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1381eb62cbbSBarry Smith      (global index,value) we store the global index as a double
139d6dfbf8fSBarry Smith      to simplify the message passing.
1401eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1411eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1421eb62cbbSBarry Smith      this is a lot of wasted space.
1431eb62cbbSBarry Smith 
1441eb62cbbSBarry Smith 
1451eb62cbbSBarry Smith        This could be done better.
1461eb62cbbSBarry Smith   */
14778b31e54SBarry Smith   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
14878b31e54SBarry Smith   CHKPTRQ(rvalues);
14978b31e54SBarry Smith   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
15078b31e54SBarry Smith   CHKPTRQ(recv_waits);
1511eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
152c60448a5SBarry Smith     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1531eb62cbbSBarry Smith               comm,recv_waits+i);
1541eb62cbbSBarry Smith   }
1551eb62cbbSBarry Smith 
1561eb62cbbSBarry Smith   /* do sends:
1571eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1581eb62cbbSBarry Smith          the ith processor
1591eb62cbbSBarry Smith   */
16078b31e54SBarry Smith   svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
16178b31e54SBarry Smith   CHKPTRQ(svalues);
16278b31e54SBarry Smith   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
16378b31e54SBarry Smith   CHKPTRQ(send_waits);
16478b31e54SBarry Smith   starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts);
1651eb62cbbSBarry Smith   starts[0] = 0;
1661eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1671eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1681eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
1691eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
1701eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
1711eb62cbbSBarry Smith   }
17278b31e54SBarry Smith   PETSCFREE(owner);
1731eb62cbbSBarry Smith   starts[0] = 0;
1741eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1751eb62cbbSBarry Smith   count = 0;
1761eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
1771eb62cbbSBarry Smith     if (procs[i]) {
178c60448a5SBarry Smith       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
1791eb62cbbSBarry Smith                 comm,send_waits+count++);
1801eb62cbbSBarry Smith     }
1811eb62cbbSBarry Smith   }
18278b31e54SBarry Smith   PETSCFREE(starts); PETSCFREE(nprocs);
1831eb62cbbSBarry Smith 
1841eb62cbbSBarry Smith   /* Free cache space */
18578b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
1861eb62cbbSBarry Smith 
1871eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues = rvalues;
1881eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs = nreceives;
1891eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
1901eb62cbbSBarry Smith   aij->rmax       = nmax;
1911eb62cbbSBarry Smith 
1921eb62cbbSBarry Smith   return 0;
1931eb62cbbSBarry Smith }
19444a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
1951eb62cbbSBarry Smith 
196fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
1971eb62cbbSBarry Smith {
1981eb62cbbSBarry Smith   int        ierr;
19944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
200*dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2011eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
2026abc6512SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
2032493cbb0SBarry Smith   int         row,col,other_disassembled;
2041eb62cbbSBarry Smith   Scalar      *values,val;
2051eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
206*dbb450caSBarry Smith   int         shift = C->indexshift;
2071eb62cbbSBarry Smith 
2081eb62cbbSBarry Smith   /*  wait on receives */
2091eb62cbbSBarry Smith   while (count) {
210d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2111eb62cbbSBarry Smith     /* unpack receives into our local space */
212d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
213c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2141eb62cbbSBarry Smith     n = n/3;
2151eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
2161eb62cbbSBarry Smith       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
2171eb62cbbSBarry Smith       col = (int) PETSCREAL(values[3*i+1]);
2181eb62cbbSBarry Smith       val = values[3*i+2];
2191eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2201eb62cbbSBarry Smith           col -= aij->cstart;
2211eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2221eb62cbbSBarry Smith       }
2231eb62cbbSBarry Smith       else {
224d6dfbf8fSBarry Smith         if (aij->assembled) {
225b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
226*dbb450caSBarry Smith           col = aij->colmap[col] + shift;
227ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2282493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2292493cbb0SBarry Smith             col = (int) PETSCREAL(values[3*i+1]);
230d6dfbf8fSBarry Smith           }
2319e25ed09SBarry Smith         }
2321eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2331eb62cbbSBarry Smith       }
2341eb62cbbSBarry Smith     }
2351eb62cbbSBarry Smith     count--;
2361eb62cbbSBarry Smith   }
23778b31e54SBarry Smith   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
2381eb62cbbSBarry Smith 
2391eb62cbbSBarry Smith   /* wait on sends */
2401eb62cbbSBarry Smith   if (aij->nsends) {
24178b31e54SBarry Smith     send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) );
24278b31e54SBarry Smith     CHKPTRQ(send_status);
2431eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
24478b31e54SBarry Smith     PETSCFREE(send_status);
2451eb62cbbSBarry Smith   }
24678b31e54SBarry Smith   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
2471eb62cbbSBarry Smith 
24807a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
24978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
25078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2511eb62cbbSBarry Smith 
2522493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2532493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
2542493cbb0SBarry Smith   MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1,
2552493cbb0SBarry Smith                  MPI_INT,MPI_PROD,mat->comm);
2562493cbb0SBarry Smith   if (aij->assembled && !other_disassembled) {
2572493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2582493cbb0SBarry Smith   }
2592493cbb0SBarry Smith 
2605e42470aSBarry Smith   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
26178b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
262d6dfbf8fSBarry Smith     aij->assembled = 1;
2635e42470aSBarry Smith   }
26478b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
26578b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2665e42470aSBarry Smith 
2678a729477SBarry Smith   return 0;
2688a729477SBarry Smith }
2698a729477SBarry Smith 
2702ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
2711eb62cbbSBarry Smith {
27244a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
273dbd7a890SLois Curfman McInnes   int ierr;
27478b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
27578b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
2761eb62cbbSBarry Smith   return 0;
2771eb62cbbSBarry Smith }
2781eb62cbbSBarry Smith 
2791eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and
2801eb62cbbSBarry Smith    scatter create routines, we should try to do it systemamatically
2811eb62cbbSBarry Smith    if we can figure out the proper level of generality. */
2821eb62cbbSBarry Smith 
2831eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
2841eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
2851eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
2861eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
2871eb62cbbSBarry Smith    routine.
2881eb62cbbSBarry Smith */
28944a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
2901eb62cbbSBarry Smith {
29144a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
2921eb62cbbSBarry Smith   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
2936abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
2941eb62cbbSBarry Smith   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
2955392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
296d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
297d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
2981eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
2991eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3001eb62cbbSBarry Smith   IS             istmp;
3011eb62cbbSBarry Smith 
302c8f67822SLois Curfman McInnes   if (!l->assembled)
30378b31e54SBarry Smith     SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix first");
30478b31e54SBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
30578b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3061eb62cbbSBarry Smith 
3071eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
30878b31e54SBarry Smith   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
30978b31e54SBarry Smith   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
31078b31e54SBarry Smith   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3111eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3121eb62cbbSBarry Smith     idx = rows[i];
3131eb62cbbSBarry Smith     found = 0;
3141eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
3151eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3161eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3171eb62cbbSBarry Smith       }
3181eb62cbbSBarry Smith     }
319bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3201eb62cbbSBarry Smith   }
3211eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
3221eb62cbbSBarry Smith 
3231eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
32478b31e54SBarry Smith   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
3251eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
3261eb62cbbSBarry Smith   nrecvs = work[mytid];
3271eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
3281eb62cbbSBarry Smith   nmax = work[mytid];
32978b31e54SBarry Smith   PETSCFREE(work);
3301eb62cbbSBarry Smith 
3311eb62cbbSBarry Smith   /* post receives:   */
33278b31e54SBarry Smith   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
33378b31e54SBarry Smith   CHKPTRQ(rvalues);
33478b31e54SBarry Smith   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
33578b31e54SBarry Smith   CHKPTRQ(recv_waits);
3361eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3371eb62cbbSBarry Smith     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
3381eb62cbbSBarry Smith               comm,recv_waits+i);
3391eb62cbbSBarry Smith   }
3401eb62cbbSBarry Smith 
3411eb62cbbSBarry Smith   /* do sends:
3421eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3431eb62cbbSBarry Smith          the ith processor
3441eb62cbbSBarry Smith   */
34578b31e54SBarry Smith   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
34678b31e54SBarry Smith   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
34778b31e54SBarry Smith   CHKPTRQ(send_waits);
34878b31e54SBarry Smith   starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts);
3491eb62cbbSBarry Smith   starts[0] = 0;
3501eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3511eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3521eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3531eb62cbbSBarry Smith   }
3541eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3551eb62cbbSBarry Smith 
3561eb62cbbSBarry Smith   starts[0] = 0;
3571eb62cbbSBarry Smith   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3581eb62cbbSBarry Smith   count = 0;
3591eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
3601eb62cbbSBarry Smith     if (procs[i]) {
3611eb62cbbSBarry Smith       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
3621eb62cbbSBarry Smith                 comm,send_waits+count++);
3631eb62cbbSBarry Smith     }
3641eb62cbbSBarry Smith   }
36578b31e54SBarry Smith   PETSCFREE(starts);
3661eb62cbbSBarry Smith 
3671eb62cbbSBarry Smith   base = owners[mytid];
3681eb62cbbSBarry Smith 
3691eb62cbbSBarry Smith   /*  wait on receives */
37078b31e54SBarry Smith   lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3711eb62cbbSBarry Smith   source = lens + nrecvs;
3721eb62cbbSBarry Smith   count = nrecvs; slen = 0;
3731eb62cbbSBarry Smith   while (count) {
374d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3751eb62cbbSBarry Smith     /* unpack receives into our local space */
3761eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
377d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
378d6dfbf8fSBarry Smith     lens[imdex]  = n;
3791eb62cbbSBarry Smith     slen += n;
3801eb62cbbSBarry Smith     count--;
3811eb62cbbSBarry Smith   }
38278b31e54SBarry Smith   PETSCFREE(recv_waits);
3831eb62cbbSBarry Smith 
3841eb62cbbSBarry Smith   /* move the data into the send scatter */
38578b31e54SBarry Smith   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
3861eb62cbbSBarry Smith   count = 0;
3871eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3881eb62cbbSBarry Smith     values = rvalues + i*nmax;
3891eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
3901eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
3911eb62cbbSBarry Smith     }
3921eb62cbbSBarry Smith   }
39378b31e54SBarry Smith   PETSCFREE(rvalues); PETSCFREE(lens);
39478b31e54SBarry Smith   PETSCFREE(owner); PETSCFREE(nprocs);
3951eb62cbbSBarry Smith 
3961eb62cbbSBarry Smith   /* actually zap the local rows */
397fafbff53SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);
398464493b3SBarry Smith   PLogObjectParent(A,istmp);
39978b31e54SBarry Smith   CHKERRQ(ierr);  PETSCFREE(lrows);
40078b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
40178b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
40278b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4031eb62cbbSBarry Smith 
4041eb62cbbSBarry Smith   /* wait on sends */
4051eb62cbbSBarry Smith   if (nsends) {
40678b31e54SBarry Smith     send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) );
40778b31e54SBarry Smith     CHKPTRQ(send_status);
4081eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
40978b31e54SBarry Smith     PETSCFREE(send_status);
4101eb62cbbSBarry Smith   }
41178b31e54SBarry Smith   PETSCFREE(send_waits); PETSCFREE(svalues);
4121eb62cbbSBarry Smith 
4131eb62cbbSBarry Smith 
4141eb62cbbSBarry Smith   return 0;
4151eb62cbbSBarry Smith }
4161eb62cbbSBarry Smith 
41744a69424SLois Curfman McInnes static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
4181eb62cbbSBarry Smith {
41944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
4201eb62cbbSBarry Smith   int        ierr;
42178b31e54SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first");
422*dbb450caSBarry Smith   ierr = VecScatterBegin(xx,aij->lvec,INSERT_VALUES,SCATTERALL,aij->Mvctx);
42378b31e54SBarry Smith   CHKERRQ(ierr);
42478b31e54SBarry Smith   ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr);
425*dbb450caSBarry Smith   ierr = VecScatterEnd(xx,aij->lvec,INSERT_VALUES,SCATTERALL,aij->Mvctx);
42678b31e54SBarry Smith   CHKERRQ(ierr);
42778b31e54SBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr);
4281eb62cbbSBarry Smith   return 0;
4291eb62cbbSBarry Smith }
4301eb62cbbSBarry Smith 
43144a69424SLois Curfman McInnes static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
432da3a660dSBarry Smith {
43344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
434da3a660dSBarry Smith   int        ierr;
43578b31e54SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first");
436*dbb450caSBarry Smith   ierr = VecScatterBegin(xx,aij->lvec,INSERT_VALUES,SCATTERALL,aij->Mvctx);
43778b31e54SBarry Smith   CHKERRQ(ierr);
43878b31e54SBarry Smith   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
439*dbb450caSBarry Smith   ierr = VecScatterEnd(xx,aij->lvec,INSERT_VALUES,SCATTERALL,aij->Mvctx);
44078b31e54SBarry Smith   CHKERRQ(ierr);
44178b31e54SBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr);
442da3a660dSBarry Smith   return 0;
443da3a660dSBarry Smith }
444da3a660dSBarry Smith 
44544a69424SLois Curfman McInnes static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
446da3a660dSBarry Smith {
44744a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
448da3a660dSBarry Smith   int        ierr;
449da3a660dSBarry Smith 
45044a69424SLois Curfman McInnes   if (!aij->assembled)
45178b31e54SBarry Smith     SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix first");
452da3a660dSBarry Smith   /* do nondiagonal part */
45378b31e54SBarry Smith   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
454da3a660dSBarry Smith   /* send it on its way */
455*dbb450caSBarry Smith   ierr = VecScatterBegin(aij->lvec,yy,ADD_VALUES,
45678b31e54SBarry Smith            (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
457da3a660dSBarry Smith   /* do local part */
45878b31e54SBarry Smith   ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr);
459da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
460da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
461da3a660dSBarry Smith   /* but is not perhaps always true. */
462*dbb450caSBarry Smith   ierr = VecScatterEnd(aij->lvec,yy,ADD_VALUES,
46378b31e54SBarry Smith          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
464da3a660dSBarry Smith   return 0;
465da3a660dSBarry Smith }
466da3a660dSBarry Smith 
46744a69424SLois Curfman McInnes static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
468da3a660dSBarry Smith {
46944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
470da3a660dSBarry Smith   int        ierr;
471da3a660dSBarry Smith 
47244a69424SLois Curfman McInnes   if (!aij->assembled)
47378b31e54SBarry Smith     SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix first");
474da3a660dSBarry Smith   /* do nondiagonal part */
47578b31e54SBarry Smith   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
476da3a660dSBarry Smith   /* send it on its way */
477*dbb450caSBarry Smith   ierr = VecScatterBegin(aij->lvec,zz,ADD_VALUES,
47878b31e54SBarry Smith          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
479da3a660dSBarry Smith   /* do local part */
48078b31e54SBarry Smith   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
481da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
482da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
483da3a660dSBarry Smith   /* but is not perhaps always true. */
484*dbb450caSBarry Smith   ierr = VecScatterEnd(aij->lvec,zz,ADD_VALUES,
48578b31e54SBarry Smith        (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
486da3a660dSBarry Smith   return 0;
487da3a660dSBarry Smith }
488da3a660dSBarry Smith 
4891eb62cbbSBarry Smith /*
4901eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
4911eb62cbbSBarry Smith    diagonal block
4921eb62cbbSBarry Smith */
493d1710a03SLois Curfman McInnes static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v)
4941eb62cbbSBarry Smith {
49544a69424SLois Curfman McInnes   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
49678b31e54SBarry Smith   if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix first");
4971eb62cbbSBarry Smith   return MatGetDiagonal(A->A,v);
4981eb62cbbSBarry Smith }
4991eb62cbbSBarry Smith 
50044a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5011eb62cbbSBarry Smith {
5021eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
50344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5041eb62cbbSBarry Smith   int        ierr;
505a5a9c739SBarry Smith #if defined(PETSC_LOG)
5066e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
507a5a9c739SBarry Smith #endif
50878b31e54SBarry Smith   PETSCFREE(aij->rowners);
50978b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
51078b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
51178b31e54SBarry Smith   if (aij->colmap) PETSCFREE(aij->colmap);
51278b31e54SBarry Smith   if (aij->garray) PETSCFREE(aij->garray);
5131eb62cbbSBarry Smith   if (aij->lvec) VecDestroy(aij->lvec);
5141eb62cbbSBarry Smith   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
51578b31e54SBarry Smith   PETSCFREE(aij);
516a5a9c739SBarry Smith   PLogObjectDestroy(mat);
517a5a9c739SBarry Smith   PETSCHEADERDESTROY(mat);
5181eb62cbbSBarry Smith   return 0;
5191eb62cbbSBarry Smith }
5207857610eSBarry Smith #include "draw.h"
521b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
522ee50ffe9SBarry Smith 
52344a69424SLois Curfman McInnes static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
5241eb62cbbSBarry Smith {
5251eb62cbbSBarry Smith   Mat         mat = (Mat) obj;
52644a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
527*dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
528f72585f2SLois Curfman McInnes   int         ierr, format;
529d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
530*dbb450caSBarry Smith   int         shift = C->indexshift;
5311eb62cbbSBarry Smith 
53278b31e54SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix first");
533154123eaSLois Curfman McInnes   if (!viewer) { /* so that viewers may be used from debuggers */
53421b0d8fbSLois Curfman McInnes     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
535154123eaSLois Curfman McInnes   }
536ab254492SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
537d636dbe3SBarry Smith   ierr = ViewerFileGetFormat_Private(viewer,&format);
538f72585f2SLois Curfman McInnes   if (vobj->cookie == VIEWER_COOKIE && format == FILE_FORMAT_INFO &&
5399b94acceSBarry Smith      (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER)) {
540f72585f2SLois Curfman McInnes    /* do nothing for now */
541f72585f2SLois Curfman McInnes   }
5429b94acceSBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
543d636dbe3SBarry Smith     FILE *fd;
544d636dbe3SBarry Smith     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
5457f813858SBarry Smith     MPIU_Seq_begin(mat->comm,1);
546d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
5471eb62cbbSBarry Smith            aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5481eb62cbbSBarry Smith            aij->cend);
54978b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
55078b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
551d13ab20cSBarry Smith     fflush(fd);
5527f813858SBarry Smith     MPIU_Seq_end(mat->comm,1);
553d13ab20cSBarry Smith   }
5549b94acceSBarry Smith   else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) ||
5557857610eSBarry Smith             vobj->cookie == DRAW_COOKIE) {
55695373324SBarry Smith     int numtids = aij->numtids, mytid = aij->mytid;
55795373324SBarry Smith     if (numtids == 1) {
55878b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
55995373324SBarry Smith     }
56095373324SBarry Smith     else {
56195373324SBarry Smith       /* assemble the entire matrix onto first processor. */
56295373324SBarry Smith       Mat     A;
563ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
5642eb8c8abSBarry Smith       int     M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
56595373324SBarry Smith       Scalar  *a;
5662ee70a88SLois Curfman McInnes 
56795373324SBarry Smith       if (!mytid) {
56895373324SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);
56995373324SBarry Smith       }
57095373324SBarry Smith       else {
57195373324SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);
57295373324SBarry Smith       }
573464493b3SBarry Smith       PLogObjectParent(mat,A);
57478b31e54SBarry Smith       CHKERRQ(ierr);
57595373324SBarry Smith 
57695373324SBarry Smith       /* copy over the A part */
577ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
5782ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
57995373324SBarry Smith       row = aij->rstart;
580*dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
58195373324SBarry Smith       for ( i=0; i<m; i++ ) {
582*dbb450caSBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
58378b31e54SBarry Smith         CHKERRQ(ierr);
58495373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
58595373324SBarry Smith       }
5862ee70a88SLois Curfman McInnes       aj = Aloc->j;
587*dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
58895373324SBarry Smith 
58995373324SBarry Smith       /* copy over the B part */
590ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
5912ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
59295373324SBarry Smith       row = aij->rstart;
59378b31e54SBarry Smith       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
594*dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
59595373324SBarry Smith       for ( i=0; i<m; i++ ) {
596*dbb450caSBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
59778b31e54SBarry Smith         CHKERRQ(ierr);
59895373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
59995373324SBarry Smith       }
60078b31e54SBarry Smith       PETSCFREE(ct);
60178b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
60278b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
60395373324SBarry Smith       if (!mytid) {
60478b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
60595373324SBarry Smith       }
60678b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
60795373324SBarry Smith     }
60895373324SBarry Smith   }
6091eb62cbbSBarry Smith   return 0;
6101eb62cbbSBarry Smith }
6111eb62cbbSBarry Smith 
612ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat);
6131eb62cbbSBarry Smith /*
6141eb62cbbSBarry Smith     This has to provide several versions.
6151eb62cbbSBarry Smith 
6161eb62cbbSBarry Smith      1) per sequential
6171eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6181eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
619d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
6201eb62cbbSBarry Smith */
621fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
622*dbb450caSBarry Smith                            double fshift,int its,Vec xx)
6238a729477SBarry Smith {
62444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
625d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
626ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
6276abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
6286abc6512SBarry Smith   int        ierr,*idx, *diag;
6296abc6512SBarry Smith   int        n = mat->n, m = mat->m, i;
630da3a660dSBarry Smith   Vec        tt;
631*dbb450caSBarry Smith   int        shift = A->indexshift;
6328a729477SBarry Smith 
63378b31e54SBarry Smith   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix first");
6341eb62cbbSBarry Smith 
635d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
636*dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
637*dbb450caSBarry Smith   ls = ls + shift;
638ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
639d6dfbf8fSBarry Smith   diag = A->diag;
640acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
64142f8db3fSLois Curfman McInnes     SETERRQ(1,"MatRelax_MPIAIJ:Option not yet supported");
642acb40c82SBarry Smith   }
643da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
644da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
645da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
646da3a660dSBarry Smith 
647da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
648da3a660dSBarry Smith 
649da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
650da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
651da3a660dSBarry Smith     is the relaxation factor.
652da3a660dSBarry Smith     */
65378b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
654464493b3SBarry Smith     PLogObjectParent(matin,tt);
655da3a660dSBarry Smith     VecGetArray(tt,&t);
656da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
657da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
658da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
659*dbb450caSBarry Smith     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
66078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
661da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
662da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
663*dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
664*dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
665da3a660dSBarry Smith       sum  = b[i];
666da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
667*dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
668da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
669*dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
670*dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
671da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
672da3a660dSBarry Smith       x[i] = omega*(sum/d);
673da3a660dSBarry Smith     }
674*dbb450caSBarry Smith     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
67578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
676da3a660dSBarry Smith 
677da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
678da3a660dSBarry Smith     v = A->a;
679*dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
680da3a660dSBarry Smith 
681da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
682*dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
683da3a660dSBarry Smith     diag = A->diag;
684da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
685*dbb450caSBarry Smith     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
68678b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
687da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
688da3a660dSBarry Smith       n    = diag[i] - A->i[i];
689*dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
690*dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
691da3a660dSBarry Smith       sum  = t[i];
692da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
693*dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
694da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
695*dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
696*dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
697da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
698da3a660dSBarry Smith       t[i] = omega*(sum/d);
699da3a660dSBarry Smith     }
700*dbb450caSBarry Smith     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
70178b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
702da3a660dSBarry Smith     /*  x = x + t */
703da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
704da3a660dSBarry Smith     VecDestroy(tt);
705da3a660dSBarry Smith     return 0;
706da3a660dSBarry Smith   }
707da3a660dSBarry Smith 
7081eb62cbbSBarry Smith 
709d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
710da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
711da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
712da3a660dSBarry Smith     }
713da3a660dSBarry Smith     else {
714*dbb450caSBarry Smith       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx);
71578b31e54SBarry Smith       CHKERRQ(ierr);
716*dbb450caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx);
71778b31e54SBarry Smith       CHKERRQ(ierr);
718da3a660dSBarry Smith     }
719d6dfbf8fSBarry Smith     while (its--) {
720d6dfbf8fSBarry Smith       /* go down through the rows */
721*dbb450caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
72278b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
723d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
724d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
725*dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
726*dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
727d6dfbf8fSBarry Smith         sum  = b[i];
728d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
729*dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
730d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
731*dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
732*dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
733d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
734d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
735d6dfbf8fSBarry Smith       }
736*dbb450caSBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
73778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
738d6dfbf8fSBarry Smith       /* come up through the rows */
739*dbb450caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
74078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
741d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
742d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
743*dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
744*dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
745d6dfbf8fSBarry Smith         sum  = b[i];
746d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
747*dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
748d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
749*dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
750*dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
751d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
752d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
753d6dfbf8fSBarry Smith       }
754*dbb450caSBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
75578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
756d6dfbf8fSBarry Smith     }
757d6dfbf8fSBarry Smith   }
758d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
759da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
760da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
761*dbb450caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
76278b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
763da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
764da3a660dSBarry Smith         n    = diag[i] - A->i[i];
765*dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
766*dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
767da3a660dSBarry Smith         sum  = b[i];
768da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
769*dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
770da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
771*dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
772*dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
773da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
774da3a660dSBarry Smith         x[i] = omega*(sum/d);
775da3a660dSBarry Smith       }
776*dbb450caSBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
77778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
778da3a660dSBarry Smith       its--;
779da3a660dSBarry Smith     }
780d6dfbf8fSBarry Smith     while (its--) {
781*dbb450caSBarry Smith       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx);
78278b31e54SBarry Smith       CHKERRQ(ierr);
783*dbb450caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx);
78478b31e54SBarry Smith       CHKERRQ(ierr);
785*dbb450caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
78678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
787d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
788d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
789*dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
790*dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
791d6dfbf8fSBarry Smith         sum  = b[i];
792d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
793*dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
794d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
795*dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
796*dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
797d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
798d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
799d6dfbf8fSBarry Smith       }
800*dbb450caSBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
80178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
802d6dfbf8fSBarry Smith     }
803d6dfbf8fSBarry Smith   }
804d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
805da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
806da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
807*dbb450caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
80878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
809da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
810da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
811*dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
812*dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
813da3a660dSBarry Smith         sum  = b[i];
814da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
815*dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
816da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
817*dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
818*dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
819da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
820da3a660dSBarry Smith         x[i] = omega*(sum/d);
821da3a660dSBarry Smith       }
822*dbb450caSBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
82378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
824da3a660dSBarry Smith       its--;
825da3a660dSBarry Smith     }
826d6dfbf8fSBarry Smith     while (its--) {
827*dbb450caSBarry Smith       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERDOWN,
82878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
829*dbb450caSBarry Smith       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERDOWN,
83078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
831*dbb450caSBarry Smith       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
83278b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
833d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
834d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
835*dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
836*dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
837d6dfbf8fSBarry Smith         sum  = b[i];
838d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
839*dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
840d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
841*dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
842*dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
843d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
844d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
845d6dfbf8fSBarry Smith       }
846*dbb450caSBarry Smith       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
84778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
848d6dfbf8fSBarry Smith     }
849d6dfbf8fSBarry Smith   }
850d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
851da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
852*dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
853da3a660dSBarry Smith     }
854*dbb450caSBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx);
85578b31e54SBarry Smith     CHKERRQ(ierr);
856*dbb450caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx);
85778b31e54SBarry Smith     CHKERRQ(ierr);
858d6dfbf8fSBarry Smith     while (its--) {
859d6dfbf8fSBarry Smith       /* go down through the rows */
860d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
861d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
862*dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
863*dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
864d6dfbf8fSBarry Smith         sum  = b[i];
865d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
866*dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
867d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
868*dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
869*dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
870d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
871d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
872d6dfbf8fSBarry Smith       }
873d6dfbf8fSBarry Smith       /* come up through the rows */
874d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
875d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
876*dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
877*dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
878d6dfbf8fSBarry Smith         sum  = b[i];
879d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
880*dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
881d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
882*dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
883*dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
884d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
885d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
886d6dfbf8fSBarry Smith       }
887d6dfbf8fSBarry Smith     }
888d6dfbf8fSBarry Smith   }
889d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
890da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
891*dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
892da3a660dSBarry Smith     }
893*dbb450caSBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx);
89478b31e54SBarry Smith     CHKERRQ(ierr);
895*dbb450caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx);
89678b31e54SBarry Smith     CHKERRQ(ierr);
897d6dfbf8fSBarry Smith     while (its--) {
898d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
899d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
900*dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
901*dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
902d6dfbf8fSBarry Smith         sum  = b[i];
903d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
904*dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
905d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
906*dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
907*dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
908d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
909d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
910d6dfbf8fSBarry Smith       }
911d6dfbf8fSBarry Smith     }
912d6dfbf8fSBarry Smith   }
913d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
914da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
915*dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
916da3a660dSBarry Smith     }
917*dbb450caSBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERALL,
91878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
919*dbb450caSBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERALL,
92078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
921d6dfbf8fSBarry Smith     while (its--) {
922d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
923d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
924*dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
925*dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
926d6dfbf8fSBarry Smith         sum  = b[i];
927d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
928*dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
929d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
930*dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
931*dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
932d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
933d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
934d6dfbf8fSBarry Smith       }
935d6dfbf8fSBarry Smith     }
936d6dfbf8fSBarry Smith   }
9378a729477SBarry Smith   return 0;
9388a729477SBarry Smith }
939a66be287SLois Curfman McInnes 
940fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
941fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
942a66be287SLois Curfman McInnes {
943a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
944a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
945a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
946a66be287SLois Curfman McInnes 
94778b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
94878b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
949a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
950a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
951a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
952a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
953a66be287SLois Curfman McInnes     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm);
954a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
955a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
956a66be287SLois Curfman McInnes     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm);
957a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
958a66be287SLois Curfman McInnes   }
959a66be287SLois Curfman McInnes   return 0;
960a66be287SLois Curfman McInnes }
961a66be287SLois Curfman McInnes 
962299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
963299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
964299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
965299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
966299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
967299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
968299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
969299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
970299609e3SLois Curfman McInnes 
971fc76ce87SLois Curfman McInnes static int MatSetOption_MPIAIJ(Mat aijin,MatOption op)
972c74985f6SBarry Smith {
97344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
974c74985f6SBarry Smith 
975c74985f6SBarry Smith   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
976c74985f6SBarry Smith     MatSetOption(aij->A,op);
977c74985f6SBarry Smith     MatSetOption(aij->B,op);
978c74985f6SBarry Smith   }
979c74985f6SBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) {
980c74985f6SBarry Smith     MatSetOption(aij->A,op);
981c74985f6SBarry Smith     MatSetOption(aij->B,op);
982c74985f6SBarry Smith   }
983bbb6d6a8SBarry Smith   else if (op == COLUMN_ORIENTED)
984bbb6d6a8SBarry Smith     SETERRQ(1,"MatSetOption_MPIAIJ:Column oriented not supported");
985c74985f6SBarry Smith   return 0;
986c74985f6SBarry Smith }
987c74985f6SBarry Smith 
988d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
989c74985f6SBarry Smith {
99044a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
991c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
992c74985f6SBarry Smith   return 0;
993c74985f6SBarry Smith }
994c74985f6SBarry Smith 
995d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
996c74985f6SBarry Smith {
99744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
998b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
999c74985f6SBarry Smith   return 0;
1000c74985f6SBarry Smith }
1001c74985f6SBarry Smith 
1002d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1003c74985f6SBarry Smith {
100444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1005c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1006c74985f6SBarry Smith   return 0;
1007c74985f6SBarry Smith }
1008c74985f6SBarry Smith 
100939e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
101039e00950SLois Curfman McInnes {
1011154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1012154123eaSLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1013154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1014154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
101539e00950SLois Curfman McInnes 
101639e00950SLois Curfman McInnes   if (row < rstart || row >= rend)
1017bbb6d6a8SBarry Smith     SETERRQ(1,"MatGetRow_MPIAIJ:Currently you can get only local rows")
1018abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
101939e00950SLois Curfman McInnes 
1020154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1021154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1022154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
102378b31e54SBarry Smith   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
102478b31e54SBarry Smith   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1025154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1026154123eaSLois Curfman McInnes 
1027154123eaSLois Curfman McInnes   if (v  || idx) {
1028154123eaSLois Curfman McInnes     if (nztot) {
1029154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1030299609e3SLois Curfman McInnes       int imark;
103148b35521SBarry Smith       if (mat->assembled) {
1032154123eaSLois Curfman McInnes         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
103348b35521SBarry Smith       }
1034154123eaSLois Curfman McInnes       if (v) {
103578b31e54SBarry Smith         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
103639e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1037154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1038154123eaSLois Curfman McInnes           else break;
1039154123eaSLois Curfman McInnes         }
1040154123eaSLois Curfman McInnes         imark = i;
1041154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1042299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1043154123eaSLois Curfman McInnes       }
1044154123eaSLois Curfman McInnes       if (idx) {
104578b31e54SBarry Smith         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1046154123eaSLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1047154123eaSLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1048154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1049154123eaSLois Curfman McInnes           else break;
1050154123eaSLois Curfman McInnes         }
1051154123eaSLois Curfman McInnes         imark = i;
1052154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1053299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
105439e00950SLois Curfman McInnes       }
105539e00950SLois Curfman McInnes     }
105639e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1057154123eaSLois Curfman McInnes   }
105839e00950SLois Curfman McInnes   *nz = nztot;
105978b31e54SBarry Smith   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
106078b31e54SBarry Smith   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
106139e00950SLois Curfman McInnes   return 0;
106239e00950SLois Curfman McInnes }
106339e00950SLois Curfman McInnes 
106439e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
106539e00950SLois Curfman McInnes {
106678b31e54SBarry Smith   if (idx) PETSCFREE(*idx);
106778b31e54SBarry Smith   if (v) PETSCFREE(*v);
106839e00950SLois Curfman McInnes   return 0;
106939e00950SLois Curfman McInnes }
107039e00950SLois Curfman McInnes 
1071855ac2c5SLois Curfman McInnes static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm)
1072855ac2c5SLois Curfman McInnes {
1073855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1074aac3204fSLois Curfman McInnes   int        ierr, i, j, cstart = aij->cstart;
107504ca555eSLois Curfman McInnes   double     sum = 0.0;
1076ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
107704ca555eSLois Curfman McInnes   Scalar     *v;
1078*dbb450caSBarry Smith   int        shift = amat->indexshift;
107904ca555eSLois Curfman McInnes 
108004ca555eSLois Curfman McInnes   if (!aij->assembled)
108104ca555eSLois Curfman McInnes     SETERRQ(1,"MatNorm_MPIAIJ: Cannot compute norm of unassembled matrix");
1082855ac2c5SLois Curfman McInnes   if (aij->numtids == 1) {
108314183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
108437fa93a5SLois Curfman McInnes   } else {
108504ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
108604ca555eSLois Curfman McInnes       v = amat->a;
108704ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
108804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
108904ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
109004ca555eSLois Curfman McInnes #else
109104ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
109204ca555eSLois Curfman McInnes #endif
109304ca555eSLois Curfman McInnes       }
109404ca555eSLois Curfman McInnes       v = bmat->a;
109504ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
109604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
109704ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
109804ca555eSLois Curfman McInnes #else
109904ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
110004ca555eSLois Curfman McInnes #endif
110104ca555eSLois Curfman McInnes       }
110204ca555eSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
110304ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
110404ca555eSLois Curfman McInnes     }
110504ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
110604ca555eSLois Curfman McInnes       double *tmp, *tmp2;
110704ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
110804ca555eSLois Curfman McInnes       tmp = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp);
110904ca555eSLois Curfman McInnes       tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
111004ca555eSLois Curfman McInnes       PETSCMEMSET(tmp,0,aij->N*sizeof(double));
111104ca555eSLois Curfman McInnes       *norm = 0.0;
111204ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
111304ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
111404ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1115*dbb450caSBarry Smith         tmp[cstart + *jj++ + shift] += abs(*v++);
111604ca555eSLois Curfman McInnes #else
1117*dbb450caSBarry Smith         tmp[cstart + *jj++ + shift] += fabs(*v++);
111804ca555eSLois Curfman McInnes #endif
111904ca555eSLois Curfman McInnes       }
112004ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
112104ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
112204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1123*dbb450caSBarry Smith         tmp[garray[*jj++ + shift]] += abs(*v++);
112404ca555eSLois Curfman McInnes #else
1125*dbb450caSBarry Smith         tmp[garray[*jj++ + shift]] += fabs(*v++);
112604ca555eSLois Curfman McInnes #endif
112704ca555eSLois Curfman McInnes       }
112804ca555eSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
112904ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
113004ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
113104ca555eSLois Curfman McInnes       }
113204ca555eSLois Curfman McInnes       PETSCFREE(tmp); PETSCFREE(tmp2);
113304ca555eSLois Curfman McInnes     }
113404ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
113504ca555eSLois Curfman McInnes       double normtemp = 0.0;
113604ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1137*dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
113804ca555eSLois Curfman McInnes         sum = 0.0;
113904ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
114004ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
114104ca555eSLois Curfman McInnes           sum += abs(*v); v++;
114204ca555eSLois Curfman McInnes #else
114304ca555eSLois Curfman McInnes           sum += fabs(*v); v++;
114404ca555eSLois Curfman McInnes #endif
114504ca555eSLois Curfman McInnes         }
1146*dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
114704ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
114804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
114904ca555eSLois Curfman McInnes           sum += abs(*v); v++;
115004ca555eSLois Curfman McInnes #else
115104ca555eSLois Curfman McInnes           sum += fabs(*v); v++;
115204ca555eSLois Curfman McInnes #endif
115304ca555eSLois Curfman McInnes         }
115404ca555eSLois Curfman McInnes         if (sum > normtemp) normtemp = sum;
115504ca555eSLois Curfman McInnes         MPI_Allreduce((void*)&normtemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
115604ca555eSLois Curfman McInnes       }
115704ca555eSLois Curfman McInnes     }
115804ca555eSLois Curfman McInnes     else {
115904ca555eSLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIRow:No support for the two norm");
116004ca555eSLois Curfman McInnes     }
116137fa93a5SLois Curfman McInnes   }
1162855ac2c5SLois Curfman McInnes   return 0;
1163855ac2c5SLois Curfman McInnes }
1164855ac2c5SLois Curfman McInnes 
11650de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1166b7c46309SBarry Smith {
1167b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1168b7c46309SBarry Smith   int        ierr;
1169b7c46309SBarry Smith   Mat        B;
1170*dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1171b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1172b7c46309SBarry Smith   Scalar     *array;
1173*dbb450caSBarry Smith   int        shift = Aloc->indexshift;
1174b7c46309SBarry Smith 
11750de55854SLois Curfman McInnes   if (!matout && M != N) SETERRQ(1,
11760de55854SLois Curfman McInnes     "MatTranspose_MPIAIJ: Cannot transpose rectangular matrix in place");
1177b7c46309SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1178b7c46309SBarry Smith   CHKERRQ(ierr);
1179b7c46309SBarry Smith 
1180b7c46309SBarry Smith   /* copy over the A part */
1181ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1182b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1183b7c46309SBarry Smith   row = a->rstart;
1184*dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1185b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1186*dbb450caSBarry Smith       ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1187b7c46309SBarry Smith       CHKERRQ(ierr);
1188b7c46309SBarry Smith       row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1189b7c46309SBarry Smith   }
1190b7c46309SBarry Smith   aj = Aloc->j;
1191*dbb450caSBarry Smith   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1192b7c46309SBarry Smith 
1193b7c46309SBarry Smith   /* copy over the B part */
1194ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1195b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1196b7c46309SBarry Smith   row = a->rstart;
1197*dbb450caSBarry Smith   ct = cols = (int *) PETSCMALLOC( (ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1198*dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1199b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1200*dbb450caSBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1201b7c46309SBarry Smith     CHKERRQ(ierr);
1202b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1203b7c46309SBarry Smith   }
1204b7c46309SBarry Smith   PETSCFREE(ct);
1205b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1206b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
12070de55854SLois Curfman McInnes   if (matout) {
12080de55854SLois Curfman McInnes     *matout = B;
12090de55854SLois Curfman McInnes   } else {
12100de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12110de55854SLois Curfman McInnes     PETSCFREE(a->rowners);
12120de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
12130de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
12140de55854SLois Curfman McInnes     if (a->colmap) PETSCFREE(a->colmap);
12150de55854SLois Curfman McInnes     if (a->garray) PETSCFREE(a->garray);
12160de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
12170de55854SLois Curfman McInnes     if (a->Mvctx) VecScatterCtxDestroy(a->Mvctx);
12180de55854SLois Curfman McInnes     PETSCFREE(a);
12190de55854SLois Curfman McInnes     PETSCMEMCPY(A,B,sizeof(struct _Mat));
12200de55854SLois Curfman McInnes     PETSCHEADERDESTROY(B);
12210de55854SLois Curfman McInnes   }
1222b7c46309SBarry Smith   return 0;
1223b7c46309SBarry Smith }
1224b7c46309SBarry Smith 
1225fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1226ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1227d6dfbf8fSBarry Smith 
12288a729477SBarry Smith /* -------------------------------------------------------------------*/
12292ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
123039e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
123144a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
123244a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1233299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1234299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1235299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
123644a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1237b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1238a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1239855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1240ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
12411eb62cbbSBarry Smith        0,
1242299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1243299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1244299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1245d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1246299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
1247ff7509f2SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
12488a729477SBarry Smith 
12498a729477SBarry Smith /*@
1250ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1251ff756334SLois Curfman McInnes    (the default parallel PETSc format).
12528a729477SBarry Smith 
12538a729477SBarry Smith    Input Parameters:
12541eb62cbbSBarry Smith .  comm - MPI communicator
12557d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
12567d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
12577d3e4905SLois Curfman McInnes            if N is given)
12587d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
12597d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
12607d3e4905SLois Curfman McInnes            if n is given)
1261ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1262ff756334SLois Curfman McInnes            (same for all local rows)
1263ab693e5aSLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1264ab693e5aSLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1265ab693e5aSLois Curfman McInnes            for the diagonal entry even if it is zero.
1266ab693e5aSLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1267ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
1268ab693e5aSLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1269ab693e5aSLois Curfman McInnes            submatrix or null (possibly different for each row).
12708a729477SBarry Smith 
1271ff756334SLois Curfman McInnes    Output Parameter:
12728a729477SBarry Smith .  newmat - the matrix
12738a729477SBarry Smith 
1274ff756334SLois Curfman McInnes    Notes:
1275ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1276ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
1277ff756334SLois Curfman McInnes    storage.  That is, the stored row and column indices begin at
1278ab693e5aSLois Curfman McInnes    one, not zero.  See the users manual for further details.
1279ff756334SLois Curfman McInnes 
1280ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1281ff756334SLois Curfman McInnes    (possibly both).
1282ff756334SLois Curfman McInnes 
1283e0245417SLois Curfman McInnes    Storage Information:
1284e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1285e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1286e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1287e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1288e0245417SLois Curfman McInnes 
1289e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
1290e0245417SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both). Set both
1291e0245417SLois Curfman McInnes    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1292e0245417SLois Curfman McInnes    Likewise, specify preallocated storage for the off-diagonal part of
1293e0245417SLois Curfman McInnes    the local submatrix with o_nz or o_nnz (not both).
1294ff756334SLois Curfman McInnes 
1295dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1296ff756334SLois Curfman McInnes 
1297fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
12988a729477SBarry Smith @*/
12991eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
13001eb62cbbSBarry Smith                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
13018a729477SBarry Smith {
13028a729477SBarry Smith   Mat          mat;
130344a69424SLois Curfman McInnes   Mat_MPIAIJ   *aij;
13046abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
13058a729477SBarry Smith   *newmat         = 0;
130644a69424SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1307a5a9c739SBarry Smith   PLogObjectCreate(mat);
130878b31e54SBarry Smith   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1309*dbb450caSBarry Smith   PETSCMEMCPY(&mat->ops,&MatOps,sizeof(struct _MatOps));
131044a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
131144a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
13128a729477SBarry Smith   mat->factor     = 0;
1313d6dfbf8fSBarry Smith 
131407a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
13151eb62cbbSBarry Smith   MPI_Comm_rank(comm,&aij->mytid);
13161eb62cbbSBarry Smith   MPI_Comm_size(comm,&aij->numtids);
13171eb62cbbSBarry Smith 
1318dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
13191eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1320d6dfbf8fSBarry Smith     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
1321dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1322dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
13231eb62cbbSBarry Smith   }
1324dbd7a890SLois Curfman McInnes   if (m == PETSC_DECIDE)
1325dbd7a890SLois Curfman McInnes     {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
1326dbd7a890SLois Curfman McInnes   if (n == PETSC_DECIDE)
1327dbd7a890SLois Curfman McInnes     {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
13288a729477SBarry Smith   aij->m = m;
13298a729477SBarry Smith   aij->n = n;
13301eb62cbbSBarry Smith   aij->N = N;
13311eb62cbbSBarry Smith   aij->M = M;
13321eb62cbbSBarry Smith 
13331eb62cbbSBarry Smith   /* build local table of row and column ownerships */
133478b31e54SBarry Smith   aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int));
133578b31e54SBarry Smith   CHKPTRQ(aij->rowners);
1336464493b3SBarry Smith   PLogObjectMemory(mat,2*(aij->numtids+2)*sizeof(int)+sizeof(struct _Mat)+
1337464493b3SBarry Smith                        sizeof(Mat_MPIAIJ));
13381eb62cbbSBarry Smith   aij->cowners = aij->rowners + aij->numtids + 1;
13391eb62cbbSBarry Smith   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
13401eb62cbbSBarry Smith   aij->rowners[0] = 0;
13411eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
13421eb62cbbSBarry Smith     aij->rowners[i] += aij->rowners[i-1];
13438a729477SBarry Smith   }
13441eb62cbbSBarry Smith   aij->rstart = aij->rowners[aij->mytid];
13451eb62cbbSBarry Smith   aij->rend   = aij->rowners[aij->mytid+1];
13461eb62cbbSBarry Smith   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
13471eb62cbbSBarry Smith   aij->cowners[0] = 0;
13481eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
13491eb62cbbSBarry Smith     aij->cowners[i] += aij->cowners[i-1];
13508a729477SBarry Smith   }
13511eb62cbbSBarry Smith   aij->cstart = aij->cowners[aij->mytid];
13521eb62cbbSBarry Smith   aij->cend   = aij->cowners[aij->mytid+1];
13538a729477SBarry Smith 
1354fafbff53SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A);
135578b31e54SBarry Smith   CHKERRQ(ierr);
1356a5a9c739SBarry Smith   PLogObjectParent(mat,aij->A);
1357fafbff53SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B);
135878b31e54SBarry Smith   CHKERRQ(ierr);
1359a5a9c739SBarry Smith   PLogObjectParent(mat,aij->B);
13608a729477SBarry Smith 
13611eb62cbbSBarry Smith   /* build cache for off array entries formed */
136278b31e54SBarry Smith   ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr);
13639e25ed09SBarry Smith   aij->colmap    = 0;
13649e25ed09SBarry Smith   aij->garray    = 0;
13658a729477SBarry Smith 
13661eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
13671eb62cbbSBarry Smith   aij->lvec      = 0;
13681eb62cbbSBarry Smith   aij->Mvctx     = 0;
1369d6dfbf8fSBarry Smith   aij->assembled = 0;
13708a729477SBarry Smith 
1371d6dfbf8fSBarry Smith   *newmat = mat;
1372d6dfbf8fSBarry Smith   return 0;
1373d6dfbf8fSBarry Smith }
1374c74985f6SBarry Smith 
1375ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1376d6dfbf8fSBarry Smith {
1377d6dfbf8fSBarry Smith   Mat        mat;
137844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
13792ee70a88SLois Curfman McInnes   int        ierr, len;
1380d6dfbf8fSBarry Smith   *newmat      = 0;
1381d6dfbf8fSBarry Smith 
1382bbb6d6a8SBarry Smith   if (!oldmat->assembled)
1383bbb6d6a8SBarry Smith     SETERRQ(1,"MatCopyPrivate_MPIAIJ:Cannot copy unassembled matrix");
138444a69424SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1385a5a9c739SBarry Smith   PLogObjectCreate(mat);
138678b31e54SBarry Smith   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1387*dbb450caSBarry Smith   PETSCMEMCPY(&mat->ops,&MatOps,sizeof(struct _MatOps));
138844a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
138944a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1390d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1391d6dfbf8fSBarry Smith 
1392d6dfbf8fSBarry Smith   aij->m          = oldmat->m;
1393d6dfbf8fSBarry Smith   aij->n          = oldmat->n;
1394d6dfbf8fSBarry Smith   aij->M          = oldmat->M;
1395d6dfbf8fSBarry Smith   aij->N          = oldmat->N;
1396d6dfbf8fSBarry Smith 
1397d6dfbf8fSBarry Smith   aij->assembled  = 1;
1398d6dfbf8fSBarry Smith   aij->rstart     = oldmat->rstart;
1399d6dfbf8fSBarry Smith   aij->rend       = oldmat->rend;
1400d6dfbf8fSBarry Smith   aij->cstart     = oldmat->cstart;
1401d6dfbf8fSBarry Smith   aij->cend       = oldmat->cend;
1402d6dfbf8fSBarry Smith   aij->numtids    = oldmat->numtids;
1403d6dfbf8fSBarry Smith   aij->mytid      = oldmat->mytid;
140407a0e7adSLois Curfman McInnes   aij->insertmode = NOTSETVALUES;
1405d6dfbf8fSBarry Smith 
140678b31e54SBarry Smith   aij->rowners        = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) );
140778b31e54SBarry Smith   CHKPTRQ(aij->rowners);
1408464493b3SBarry Smith   PLogObjectMemory(mat,(aij->numtids+1)*sizeof(int)+sizeof(struct _Mat)+
1409464493b3SBarry Smith                        sizeof(Mat_MPIAIJ));
141078b31e54SBarry Smith   PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
141178b31e54SBarry Smith   ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr);
14122ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
141378b31e54SBarry Smith     aij->colmap      = (int *) PETSCMALLOC( (aij->N)*sizeof(int) );
141478b31e54SBarry Smith     CHKPTRQ(aij->colmap);
1415464493b3SBarry Smith     PLogObjectMemory(mat,(aij->N)*sizeof(int));
141678b31e54SBarry Smith     PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int));
14172ee70a88SLois Curfman McInnes   } else aij->colmap = 0;
1418ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
141978b31e54SBarry Smith     aij->garray      = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray);
1420464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
142178b31e54SBarry Smith     PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int));
14222ee70a88SLois Curfman McInnes   } else aij->garray = 0;
1423d6dfbf8fSBarry Smith 
142478b31e54SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr);
1425a5a9c739SBarry Smith   PLogObjectParent(mat,aij->lvec);
142678b31e54SBarry Smith   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr);
1427a5a9c739SBarry Smith   PLogObjectParent(mat,aij->Mvctx);
142878b31e54SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr);
1429a5a9c739SBarry Smith   PLogObjectParent(mat,aij->A);
143078b31e54SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr);
1431a5a9c739SBarry Smith   PLogObjectParent(mat,aij->B);
14328a729477SBarry Smith   *newmat = mat;
14338a729477SBarry Smith   return 0;
14348a729477SBarry Smith }
1435