xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1ca473b0f7c3907bef23e1dfbf3df215de8cdc7f)
1cb512458SBarry Smith #ifndef lint
2*1ca473b0SSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.149 1996/07/02 18:06:22 bsmith Exp balay $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
7d9942c19SSatish Balay #include "src/inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
2970423df4SBarry Smith static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int        ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
408a729477SBarry Smith {
4144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
434b0e389bSBarry Smith   Scalar     value;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
464b0e389bSBarry Smith   int        shift = C->indexshift,roworiented = aij->roworiented;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
534b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
544b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
564b0e389bSBarry Smith       row = im[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
584b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
594b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
604b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
614b0e389bSBarry Smith           col = in[j] - cstart;
624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
634b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
641eb62cbbSBarry Smith         }
651eb62cbbSBarry Smith         else {
66227d817aSBarry Smith           if (mat->was_assembled) {
67b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
684b0e389bSBarry Smith             col = aij->colmap[in[j]] + shift;
69ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
702493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
714b0e389bSBarry Smith               col =  in[j];
72d6dfbf8fSBarry Smith             }
739e25ed09SBarry Smith           }
744b0e389bSBarry Smith           else col = in[j];
754b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
764b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
771eb62cbbSBarry Smith         }
781eb62cbbSBarry Smith       }
791eb62cbbSBarry Smith     }
801eb62cbbSBarry Smith     else {
814b0e389bSBarry Smith       if (roworiented) {
824b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
834b0e389bSBarry Smith       }
844b0e389bSBarry Smith       else {
854b0e389bSBarry Smith         row = im[i];
864b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
874b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
884b0e389bSBarry Smith         }
894b0e389bSBarry Smith       }
901eb62cbbSBarry Smith     }
918a729477SBarry Smith   }
928a729477SBarry Smith   return 0;
938a729477SBarry Smith }
948a729477SBarry Smith 
95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96b49de8d1SLois Curfman McInnes {
97b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
101b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
102b49de8d1SLois Curfman McInnes 
103b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
104b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
107b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
108b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
109b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
112b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
113b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114b49de8d1SLois Curfman McInnes         }
115b49de8d1SLois Curfman McInnes         else {
11670423df4SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
117b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
118d9d09a02SSatish Balay           if ( aij->garray[col] != idxn[j] ) *(v+i*n+j) = 0.0;
119d9d09a02SSatish Balay           else {
120b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
121b49de8d1SLois Curfman McInnes           }
122b49de8d1SLois Curfman McInnes         }
123b49de8d1SLois Curfman McInnes       }
124d9d09a02SSatish Balay     }
125b49de8d1SLois Curfman McInnes     else {
126b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
127b49de8d1SLois Curfman McInnes     }
128b49de8d1SLois Curfman McInnes   }
129b49de8d1SLois Curfman McInnes   return 0;
130b49de8d1SLois Curfman McInnes }
131b49de8d1SLois Curfman McInnes 
132fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1338a729477SBarry Smith {
13444a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
135d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
13617699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
13717699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1381eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1396abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1401eb62cbbSBarry Smith   InsertMode  addv;
1411eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1421eb62cbbSBarry Smith 
1431eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
144d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
145dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
146bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1471eb62cbbSBarry Smith   }
1481eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1491eb62cbbSBarry Smith 
1501eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1510452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
152cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1530452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1541eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1551eb62cbbSBarry Smith     idx = aij->stash.idx[i];
15617699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1571eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1581eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1598a729477SBarry Smith       }
1608a729477SBarry Smith     }
1618a729477SBarry Smith   }
16217699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1631eb62cbbSBarry Smith 
1641eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1650452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
16617699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
16717699dbbSLois Curfman McInnes   nreceives = work[rank];
16817699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
16917699dbbSLois Curfman McInnes   nmax = work[rank];
1700452661fSBarry Smith   PetscFree(work);
1711eb62cbbSBarry Smith 
1721eb62cbbSBarry Smith   /* post receives:
1731eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1741eb62cbbSBarry Smith      (global index,value) we store the global index as a double
175d6dfbf8fSBarry Smith      to simplify the message passing.
1761eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1771eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1781eb62cbbSBarry Smith      this is a lot of wasted space.
1791eb62cbbSBarry Smith 
1801eb62cbbSBarry Smith 
1811eb62cbbSBarry Smith        This could be done better.
1821eb62cbbSBarry Smith   */
1830452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
18478b31e54SBarry Smith   CHKPTRQ(rvalues);
1850452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
18678b31e54SBarry Smith   CHKPTRQ(recv_waits);
1871eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
188416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1891eb62cbbSBarry Smith               comm,recv_waits+i);
1901eb62cbbSBarry Smith   }
1911eb62cbbSBarry Smith 
1921eb62cbbSBarry Smith   /* do sends:
1931eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1941eb62cbbSBarry Smith          the ith processor
1951eb62cbbSBarry Smith   */
1960452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1970452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
19878b31e54SBarry Smith   CHKPTRQ(send_waits);
1990452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
2001eb62cbbSBarry Smith   starts[0] = 0;
20117699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2021eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2031eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2041eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2051eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2061eb62cbbSBarry Smith   }
2070452661fSBarry Smith   PetscFree(owner);
2081eb62cbbSBarry Smith   starts[0] = 0;
20917699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2101eb62cbbSBarry Smith   count = 0;
21117699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2121eb62cbbSBarry Smith     if (procs[i]) {
213416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2141eb62cbbSBarry Smith                 comm,send_waits+count++);
2151eb62cbbSBarry Smith     }
2161eb62cbbSBarry Smith   }
2170452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2181eb62cbbSBarry Smith 
2191eb62cbbSBarry Smith   /* Free cache space */
2202191d07cSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
22178b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2221eb62cbbSBarry Smith 
2231eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2241eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2251eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2261eb62cbbSBarry Smith   aij->rmax       = nmax;
2271eb62cbbSBarry Smith 
2281eb62cbbSBarry Smith   return 0;
2291eb62cbbSBarry Smith }
23044a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2311eb62cbbSBarry Smith 
232fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2331eb62cbbSBarry Smith {
23444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
235dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2361eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
237416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
238416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2391eb62cbbSBarry Smith   Scalar      *values,val;
2401eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2411eb62cbbSBarry Smith 
2421eb62cbbSBarry Smith   /*  wait on receives */
2431eb62cbbSBarry Smith   while (count) {
244d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2451eb62cbbSBarry Smith     /* unpack receives into our local space */
246d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
247c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2481eb62cbbSBarry Smith     n = n/3;
2491eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
250227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
251227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2521eb62cbbSBarry Smith       val = values[3*i+2];
2531eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2541eb62cbbSBarry Smith         col -= aij->cstart;
2551eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2561eb62cbbSBarry Smith       }
2571eb62cbbSBarry Smith       else {
258227d817aSBarry Smith         if (mat->was_assembled) {
259b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
260dbb450caSBarry Smith           col = aij->colmap[col] + shift;
261ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2622493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
263227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
264d6dfbf8fSBarry Smith           }
2659e25ed09SBarry Smith         }
2661eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2671eb62cbbSBarry Smith       }
2681eb62cbbSBarry Smith     }
2691eb62cbbSBarry Smith     count--;
2701eb62cbbSBarry Smith   }
2710452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2721eb62cbbSBarry Smith 
2731eb62cbbSBarry Smith   /* wait on sends */
2741eb62cbbSBarry Smith   if (aij->nsends) {
2750452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
27678b31e54SBarry Smith     CHKPTRQ(send_status);
2771eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2780452661fSBarry Smith     PetscFree(send_status);
2791eb62cbbSBarry Smith   }
2800452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2811eb62cbbSBarry Smith 
28264119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
28378b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
28478b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2851eb62cbbSBarry Smith 
2862493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2872493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
288227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
289227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
2902493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2912493cbb0SBarry Smith   }
2922493cbb0SBarry Smith 
293227d817aSBarry Smith   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
29478b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
2955e42470aSBarry Smith   }
29678b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
29778b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2985e42470aSBarry Smith 
2997a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
3008a729477SBarry Smith   return 0;
3018a729477SBarry Smith }
3028a729477SBarry Smith 
3032ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3041eb62cbbSBarry Smith {
30544a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
306dbd7a890SLois Curfman McInnes   int        ierr;
30778b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
30878b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3091eb62cbbSBarry Smith   return 0;
3101eb62cbbSBarry Smith }
3111eb62cbbSBarry Smith 
3121eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3131eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3141eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3151eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3161eb62cbbSBarry Smith    routine.
3171eb62cbbSBarry Smith */
31844a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3191eb62cbbSBarry Smith {
32044a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
32117699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3226abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
32317699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3245392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
325d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
326d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3271eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3281eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3291eb62cbbSBarry Smith   IS             istmp;
3301eb62cbbSBarry Smith 
33177c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
33278b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3331eb62cbbSBarry Smith 
3341eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3350452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
336cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3370452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3381eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3391eb62cbbSBarry Smith     idx = rows[i];
3401eb62cbbSBarry Smith     found = 0;
34117699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3421eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3431eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3441eb62cbbSBarry Smith       }
3451eb62cbbSBarry Smith     }
346bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3471eb62cbbSBarry Smith   }
34817699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3491eb62cbbSBarry Smith 
3501eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3510452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
35217699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
35317699dbbSLois Curfman McInnes   nrecvs = work[rank];
35417699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
35517699dbbSLois Curfman McInnes   nmax = work[rank];
3560452661fSBarry Smith   PetscFree(work);
3571eb62cbbSBarry Smith 
3581eb62cbbSBarry Smith   /* post receives:   */
3590452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
36078b31e54SBarry Smith   CHKPTRQ(rvalues);
3610452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
36278b31e54SBarry Smith   CHKPTRQ(recv_waits);
3631eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
364416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3651eb62cbbSBarry Smith   }
3661eb62cbbSBarry Smith 
3671eb62cbbSBarry Smith   /* do sends:
3681eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3691eb62cbbSBarry Smith          the ith processor
3701eb62cbbSBarry Smith   */
3710452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3720452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
37378b31e54SBarry Smith   CHKPTRQ(send_waits);
3740452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3751eb62cbbSBarry Smith   starts[0] = 0;
37617699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3771eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3781eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3791eb62cbbSBarry Smith   }
3801eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3811eb62cbbSBarry Smith 
3821eb62cbbSBarry Smith   starts[0] = 0;
38317699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3841eb62cbbSBarry Smith   count = 0;
38517699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3861eb62cbbSBarry Smith     if (procs[i]) {
387416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3881eb62cbbSBarry Smith     }
3891eb62cbbSBarry Smith   }
3900452661fSBarry Smith   PetscFree(starts);
3911eb62cbbSBarry Smith 
39217699dbbSLois Curfman McInnes   base = owners[rank];
3931eb62cbbSBarry Smith 
3941eb62cbbSBarry Smith   /*  wait on receives */
3950452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3961eb62cbbSBarry Smith   source = lens + nrecvs;
3971eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3981eb62cbbSBarry Smith   while (count) {
399d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
4001eb62cbbSBarry Smith     /* unpack receives into our local space */
4011eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
402d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
403d6dfbf8fSBarry Smith     lens[imdex]  = n;
4041eb62cbbSBarry Smith     slen += n;
4051eb62cbbSBarry Smith     count--;
4061eb62cbbSBarry Smith   }
4070452661fSBarry Smith   PetscFree(recv_waits);
4081eb62cbbSBarry Smith 
4091eb62cbbSBarry Smith   /* move the data into the send scatter */
4100452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4111eb62cbbSBarry Smith   count = 0;
4121eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4131eb62cbbSBarry Smith     values = rvalues + i*nmax;
4141eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4151eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4161eb62cbbSBarry Smith     }
4171eb62cbbSBarry Smith   }
4180452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4190452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4201eb62cbbSBarry Smith 
4211eb62cbbSBarry Smith   /* actually zap the local rows */
422416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
423464493b3SBarry Smith   PLogObjectParent(A,istmp);
4240452661fSBarry Smith   PetscFree(lrows);
42578b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
42678b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
42778b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4281eb62cbbSBarry Smith 
4291eb62cbbSBarry Smith   /* wait on sends */
4301eb62cbbSBarry Smith   if (nsends) {
4310452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
43278b31e54SBarry Smith     CHKPTRQ(send_status);
4331eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4340452661fSBarry Smith     PetscFree(send_status);
4351eb62cbbSBarry Smith   }
4360452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4371eb62cbbSBarry Smith 
4381eb62cbbSBarry Smith   return 0;
4391eb62cbbSBarry Smith }
4401eb62cbbSBarry Smith 
441416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4421eb62cbbSBarry Smith {
443416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
444fbd6ef76SBarry Smith   int        ierr,nt;
445416022c9SBarry Smith 
446a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
447fbd6ef76SBarry Smith   if (nt != a->n) {
448fbd6ef76SBarry Smith     SETERRQ(1,"MatMult_MPIAIJ:Incompatiable parition of A and xx");
449fbd6ef76SBarry Smith   }
450a2ce50c7SBarry Smith   ierr = VecGetLocalSize(yy,&nt);  CHKERRQ(ierr);
451fbd6ef76SBarry Smith   if (nt != a->m) {
452fbd6ef76SBarry Smith     SETERRQ(1,"MatMult_MPIAIJ:Incompatiable parition of A and yy");
453fbd6ef76SBarry Smith   }
45464119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
45538597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
45664119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
45738597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4581eb62cbbSBarry Smith   return 0;
4591eb62cbbSBarry Smith }
4601eb62cbbSBarry Smith 
461416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
462da3a660dSBarry Smith {
463416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
464da3a660dSBarry Smith   int        ierr;
46564119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
46638597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
46764119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
46838597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
469da3a660dSBarry Smith   return 0;
470da3a660dSBarry Smith }
471da3a660dSBarry Smith 
472416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
473da3a660dSBarry Smith {
474416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
475da3a660dSBarry Smith   int        ierr;
476da3a660dSBarry Smith 
477da3a660dSBarry Smith   /* do nondiagonal part */
47838597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
479da3a660dSBarry Smith   /* send it on its way */
480416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
48164119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
482da3a660dSBarry Smith   /* do local part */
48338597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
484da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
485da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
486da3a660dSBarry Smith   /* but is not perhaps always true. */
487416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
48864119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
489da3a660dSBarry Smith   return 0;
490da3a660dSBarry Smith }
491da3a660dSBarry Smith 
492416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
493da3a660dSBarry Smith {
494416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
495da3a660dSBarry Smith   int        ierr;
496da3a660dSBarry Smith 
497da3a660dSBarry Smith   /* do nondiagonal part */
49838597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
499da3a660dSBarry Smith   /* send it on its way */
500416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
50164119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
502da3a660dSBarry Smith   /* do local part */
50338597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
504da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
505da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
506da3a660dSBarry Smith   /* but is not perhaps always true. */
507416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
50864119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
509da3a660dSBarry Smith   return 0;
510da3a660dSBarry Smith }
511da3a660dSBarry Smith 
5121eb62cbbSBarry Smith /*
5131eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5141eb62cbbSBarry Smith    diagonal block
5151eb62cbbSBarry Smith */
516416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5171eb62cbbSBarry Smith {
518416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
51944cd7ae7SLois Curfman McInnes   if (a->M != a->N)
52044cd7ae7SLois Curfman McInnes     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
521416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5221eb62cbbSBarry Smith }
5231eb62cbbSBarry Smith 
524052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
525052efed2SBarry Smith {
526052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
527052efed2SBarry Smith   int        ierr;
528052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
529052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
530052efed2SBarry Smith   return 0;
531052efed2SBarry Smith }
532052efed2SBarry Smith 
53344a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5341eb62cbbSBarry Smith {
5351eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
53644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5371eb62cbbSBarry Smith   int        ierr;
538a5a9c739SBarry Smith #if defined(PETSC_LOG)
5396e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
540a5a9c739SBarry Smith #endif
5410452661fSBarry Smith   PetscFree(aij->rowners);
54278b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
54378b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5440452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5450452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5461eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
547a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5487a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
5490452661fSBarry Smith   PetscFree(aij);
550a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5510452661fSBarry Smith   PetscHeaderDestroy(mat);
5521eb62cbbSBarry Smith   return 0;
5531eb62cbbSBarry Smith }
5547857610eSBarry Smith #include "draw.h"
555b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
556ee50ffe9SBarry Smith 
557416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5581eb62cbbSBarry Smith {
559416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
560416022c9SBarry Smith   int         ierr;
561416022c9SBarry Smith 
56217699dbbSLois Curfman McInnes   if (aij->size == 1) {
563416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
564416022c9SBarry Smith   }
565a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
566416022c9SBarry Smith   return 0;
567416022c9SBarry Smith }
568416022c9SBarry Smith 
569d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
570416022c9SBarry Smith {
57144a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
572dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
573a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
574d636dbe3SBarry Smith   FILE        *fd;
57519bcc07fSBarry Smith   ViewerType  vtype;
576416022c9SBarry Smith 
57719bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
57819bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
57990ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
58090ace30eSBarry Smith     if (format == ASCII_FORMAT_INFO_DETAILED) {
58195e01e2fSLois Curfman McInnes       int nz, nzalloc, mem, flg;
582a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
58390ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
584a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
58595e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
58677c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
58795e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
58895e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
58995e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
59095e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
59108480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
59295e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz);
59308480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
59495e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz);
595a56f8943SBarry Smith       fflush(fd);
59677c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
597a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
598416022c9SBarry Smith       return 0;
599416022c9SBarry Smith     }
60090ace30eSBarry Smith     else if (format == ASCII_FORMAT_INFO) {
60108480c60SBarry Smith       return 0;
60208480c60SBarry Smith     }
603416022c9SBarry Smith   }
604416022c9SBarry Smith 
60519bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
60619bcc07fSBarry Smith     Draw       draw;
60719bcc07fSBarry Smith     PetscTruth isnull;
60819bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
60919bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
61019bcc07fSBarry Smith   }
61119bcc07fSBarry Smith 
61219bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
61390ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
61477c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
615d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
61617699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
6171eb62cbbSBarry Smith            aij->cend);
61878b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
61978b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
620d13ab20cSBarry Smith     fflush(fd);
62177c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
622d13ab20cSBarry Smith   }
623416022c9SBarry Smith   else {
624a56f8943SBarry Smith     int size = aij->size;
625a56f8943SBarry Smith     rank = aij->rank;
62617699dbbSLois Curfman McInnes     if (size == 1) {
62778b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
62895373324SBarry Smith     }
62995373324SBarry Smith     else {
63095373324SBarry Smith       /* assemble the entire matrix onto first processor. */
63195373324SBarry Smith       Mat         A;
632ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6332eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
63495373324SBarry Smith       Scalar      *a;
6352ee70a88SLois Curfman McInnes 
63617699dbbSLois Curfman McInnes       if (!rank) {
637b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
638c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
63995373324SBarry Smith       }
64095373324SBarry Smith       else {
641b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
642c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
64395373324SBarry Smith       }
644464493b3SBarry Smith       PLogObjectParent(mat,A);
645416022c9SBarry Smith 
64695373324SBarry Smith       /* copy over the A part */
647ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6482ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
64995373324SBarry Smith       row = aij->rstart;
650dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
65195373324SBarry Smith       for ( i=0; i<m; i++ ) {
652416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
65395373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
65495373324SBarry Smith       }
6552ee70a88SLois Curfman McInnes       aj = Aloc->j;
656dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
65795373324SBarry Smith 
65895373324SBarry Smith       /* copy over the B part */
659ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6602ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
66195373324SBarry Smith       row = aij->rstart;
6620452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
663dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
66495373324SBarry Smith       for ( i=0; i<m; i++ ) {
665416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
66695373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
66795373324SBarry Smith       }
6680452661fSBarry Smith       PetscFree(ct);
66978b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
67078b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
67117699dbbSLois Curfman McInnes       if (!rank) {
67278b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
67395373324SBarry Smith       }
67478b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
67595373324SBarry Smith     }
67695373324SBarry Smith   }
6771eb62cbbSBarry Smith   return 0;
6781eb62cbbSBarry Smith }
6791eb62cbbSBarry Smith 
680416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
681416022c9SBarry Smith {
682416022c9SBarry Smith   Mat         mat = (Mat) obj;
683416022c9SBarry Smith   int         ierr;
68419bcc07fSBarry Smith   ViewerType  vtype;
685416022c9SBarry Smith 
686416022c9SBarry Smith   if (!viewer) {
68719bcc07fSBarry Smith     viewer = STDOUT_VIEWER_SELF;
688416022c9SBarry Smith   }
68919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
69019bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
69119bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
692d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
693416022c9SBarry Smith   }
69419bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
695416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
696416022c9SBarry Smith   }
697416022c9SBarry Smith   return 0;
698416022c9SBarry Smith }
699416022c9SBarry Smith 
7001eb62cbbSBarry Smith /*
7011eb62cbbSBarry Smith     This has to provide several versions.
7021eb62cbbSBarry Smith 
7031eb62cbbSBarry Smith      1) per sequential
7041eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
7051eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
706d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
7071eb62cbbSBarry Smith */
708fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
709dbb450caSBarry Smith                            double fshift,int its,Vec xx)
7108a729477SBarry Smith {
71144a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
712d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
713ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
7146abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
7156abc6512SBarry Smith   int        ierr,*idx, *diag;
716416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
717da3a660dSBarry Smith   Vec        tt;
7188a729477SBarry Smith 
719d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
720dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
721dbb450caSBarry Smith   ls = ls + shift;
722ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
723d6dfbf8fSBarry Smith   diag = A->diag;
724acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
72548d91487SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
726acb40c82SBarry Smith   }
727da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
728da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
729da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
730da3a660dSBarry Smith 
731da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
732da3a660dSBarry Smith 
733da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
734da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
735da3a660dSBarry Smith     is the relaxation factor.
736da3a660dSBarry Smith     */
73778b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
738464493b3SBarry Smith     PLogObjectParent(matin,tt);
739da3a660dSBarry Smith     VecGetArray(tt,&t);
740da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
741da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
742da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
74364119d58SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
74478b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
745da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
746da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
747dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
748dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
749da3a660dSBarry Smith       sum  = b[i];
750da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
751dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
752da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
753dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
754dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
755da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
756da3a660dSBarry Smith       x[i] = omega*(sum/d);
757da3a660dSBarry Smith     }
75864119d58SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
75978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
760da3a660dSBarry Smith 
761da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
762da3a660dSBarry Smith     v = A->a;
763dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
764da3a660dSBarry Smith 
765da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
766dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
767da3a660dSBarry Smith     diag = A->diag;
768da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
76964119d58SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
77078b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
771da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
772da3a660dSBarry Smith       n    = diag[i] - A->i[i];
773dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
774dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
775da3a660dSBarry Smith       sum  = t[i];
776da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
777dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
778da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
779dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
780dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
781da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
782da3a660dSBarry Smith       t[i] = omega*(sum/d);
783da3a660dSBarry Smith     }
78464119d58SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
78578b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
786da3a660dSBarry Smith     /*  x = x + t */
787da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
788da3a660dSBarry Smith     VecDestroy(tt);
789da3a660dSBarry Smith     return 0;
790da3a660dSBarry Smith   }
791da3a660dSBarry Smith 
7921eb62cbbSBarry Smith 
793d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
794da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
795da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
796da3a660dSBarry Smith     }
797da3a660dSBarry Smith     else {
79864119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
79978b31e54SBarry Smith       CHKERRQ(ierr);
80064119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
80178b31e54SBarry Smith       CHKERRQ(ierr);
802da3a660dSBarry Smith     }
803d6dfbf8fSBarry Smith     while (its--) {
804d6dfbf8fSBarry Smith       /* go down through the rows */
80564119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
80678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
807d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
808d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
809dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
810dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
811d6dfbf8fSBarry Smith         sum  = b[i];
812d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
813dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
814d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
815dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
816dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
817d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
818d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
819d6dfbf8fSBarry Smith       }
82064119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
82178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
822d6dfbf8fSBarry Smith       /* come up through the rows */
82364119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
82478b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
825d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
826d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
827dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
828dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
829d6dfbf8fSBarry Smith         sum  = b[i];
830d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
831dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
832d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
833dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
834dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
835d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
836d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
837d6dfbf8fSBarry Smith       }
83864119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
83978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
840d6dfbf8fSBarry Smith     }
841d6dfbf8fSBarry Smith   }
842d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
843da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
844da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
84564119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
84678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
847da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
848da3a660dSBarry Smith         n    = diag[i] - A->i[i];
849dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
850dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
851da3a660dSBarry Smith         sum  = b[i];
852da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
853dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
854da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
855dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
856dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
857da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
858da3a660dSBarry Smith         x[i] = omega*(sum/d);
859da3a660dSBarry Smith       }
86064119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
86178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
862da3a660dSBarry Smith       its--;
863da3a660dSBarry Smith     }
864d6dfbf8fSBarry Smith     while (its--) {
86564119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
86678b31e54SBarry Smith       CHKERRQ(ierr);
86764119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
86878b31e54SBarry Smith       CHKERRQ(ierr);
86964119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
87078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
871d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
872d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
873dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
874dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
875d6dfbf8fSBarry Smith         sum  = b[i];
876d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
877dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
878d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
879dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
880dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
881d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
882d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
883d6dfbf8fSBarry Smith       }
88464119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
88578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
886d6dfbf8fSBarry Smith     }
887d6dfbf8fSBarry Smith   }
888d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
889da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
890da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
89164119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
89278b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
893da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
894da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
895dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
896dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
897da3a660dSBarry Smith         sum  = b[i];
898da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
899dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
900da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
901dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
902dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
903da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
904da3a660dSBarry Smith         x[i] = omega*(sum/d);
905da3a660dSBarry Smith       }
90664119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
90778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
908da3a660dSBarry Smith       its--;
909da3a660dSBarry Smith     }
910d6dfbf8fSBarry Smith     while (its--) {
91164119d58SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
91278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
91364119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
91478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
91564119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
91678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
917d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
918d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
919dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
920dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
921d6dfbf8fSBarry Smith         sum  = b[i];
922d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
923dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
924d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
925dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
926dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
927d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
928d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
929d6dfbf8fSBarry Smith       }
93064119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
93178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
932d6dfbf8fSBarry Smith     }
933d6dfbf8fSBarry Smith   }
934d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
935da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
93638597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
937da3a660dSBarry Smith     }
93864119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
93978b31e54SBarry Smith     CHKERRQ(ierr);
94064119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
94178b31e54SBarry Smith     CHKERRQ(ierr);
942d6dfbf8fSBarry Smith     while (its--) {
943d6dfbf8fSBarry Smith       /* go down through the rows */
944d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
945d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
946dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
947dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
948d6dfbf8fSBarry Smith         sum  = b[i];
949d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
950dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
951d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
952dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
953dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
954d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
955d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
956d6dfbf8fSBarry Smith       }
957d6dfbf8fSBarry Smith       /* come up through the rows */
958d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
959d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
960dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
961dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
962d6dfbf8fSBarry Smith         sum  = b[i];
963d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
964dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
965d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
966dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
967dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
968d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
969d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
970d6dfbf8fSBarry Smith       }
971d6dfbf8fSBarry Smith     }
972d6dfbf8fSBarry Smith   }
973d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
974da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
97538597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
976da3a660dSBarry Smith     }
97764119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
97878b31e54SBarry Smith     CHKERRQ(ierr);
97964119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
98078b31e54SBarry Smith     CHKERRQ(ierr);
981d6dfbf8fSBarry Smith     while (its--) {
982d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
983d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
984dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
985dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
986d6dfbf8fSBarry Smith         sum  = b[i];
987d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
988dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
989d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
990dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
991dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
992d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
993d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
994d6dfbf8fSBarry Smith       }
995d6dfbf8fSBarry Smith     }
996d6dfbf8fSBarry Smith   }
997d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
998da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
99938597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
1000da3a660dSBarry Smith     }
100164119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
100278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
100364119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
100478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
1005d6dfbf8fSBarry Smith     while (its--) {
1006d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
1007d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
1008dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
1009dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
1010d6dfbf8fSBarry Smith         sum  = b[i];
1011d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1012dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
1013d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
1014dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
1015dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
1016d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1017d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1018d6dfbf8fSBarry Smith       }
1019d6dfbf8fSBarry Smith     }
1020d6dfbf8fSBarry Smith   }
10218a729477SBarry Smith   return 0;
10228a729477SBarry Smith }
1023a66be287SLois Curfman McInnes 
1024fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1025fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
1026a66be287SLois Curfman McInnes {
1027a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1028a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
1029a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1030a66be287SLois Curfman McInnes 
103178b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
103278b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1033a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1034a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
1035bcd2baecSBarry Smith     if (nz)       *nz      = isend[0];
1036bcd2baecSBarry Smith     if (nzalloc)  *nzalloc = isend[1];
1037bcd2baecSBarry Smith     if (mem)      *mem     = isend[2];
1038a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1039d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1040bcd2baecSBarry Smith     if (nz)      *nz      = irecv[0];
1041bcd2baecSBarry Smith     if (nzalloc) *nzalloc = irecv[1];
1042bcd2baecSBarry Smith     if (mem)     *mem     = irecv[2];
1043a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1044d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1045bcd2baecSBarry Smith     if (nz)      *nz      = irecv[0];
1046bcd2baecSBarry Smith     if (nzalloc) *nzalloc = irecv[1];
1047bcd2baecSBarry Smith     if (mem)     *mem     = irecv[2];
1048a66be287SLois Curfman McInnes   }
1049a66be287SLois Curfman McInnes   return 0;
1050a66be287SLois Curfman McInnes }
1051a66be287SLois Curfman McInnes 
1052299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1053299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1054299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1055299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1056299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1057299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1058299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1059299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1060299609e3SLois Curfman McInnes 
1061416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1062c74985f6SBarry Smith {
1063c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1064c74985f6SBarry Smith 
1065c0bbcb79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
1066c0bbcb79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
1067c0bbcb79SLois Curfman McInnes       op == COLUMNS_SORTED ||
1068c0bbcb79SLois Curfman McInnes       op == ROW_ORIENTED) {
1069c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
1070c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
1071c74985f6SBarry Smith   }
1072c0bbcb79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
1073c0bbcb79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
1074c0bbcb79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1075c0bbcb79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
107694a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10774b0e389bSBarry Smith   else if (op == COLUMN_ORIENTED) {
10784b0e389bSBarry Smith     a->roworiented = 0;
10794b0e389bSBarry Smith     MatSetOption(a->A,op);
10804b0e389bSBarry Smith     MatSetOption(a->B,op);
10814b0e389bSBarry Smith   }
1082c0bbcb79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
10834d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1084c0bbcb79SLois Curfman McInnes   else
10854d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1086c74985f6SBarry Smith   return 0;
1087c74985f6SBarry Smith }
1088c74985f6SBarry Smith 
1089d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1090c74985f6SBarry Smith {
109144a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1092c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1093c74985f6SBarry Smith   return 0;
1094c74985f6SBarry Smith }
1095c74985f6SBarry Smith 
1096d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1097c74985f6SBarry Smith {
109844a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1099b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1100c74985f6SBarry Smith   return 0;
1101c74985f6SBarry Smith }
1102c74985f6SBarry Smith 
1103d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1104c74985f6SBarry Smith {
110544a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1106c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1107c74985f6SBarry Smith   return 0;
1108c74985f6SBarry Smith }
1109c74985f6SBarry Smith 
11106d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11116d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11126d84be18SBarry Smith 
11136d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
111439e00950SLois Curfman McInnes {
1115154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
111670f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1117154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1118154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
111970f0671dSBarry Smith   int        *cmap, *idx_p;
112039e00950SLois Curfman McInnes 
11217a0afa10SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
11227a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
11237a0afa10SBarry Smith 
112470f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
11257a0afa10SBarry Smith     /*
11267a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
11277a0afa10SBarry Smith     */
11287a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
11297a0afa10SBarry Smith     int     max = 1,n = mat->n,tmp;
11307a0afa10SBarry Smith     for ( i=0; i<n; i++ ) {
11317a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
11327a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
11337a0afa10SBarry Smith     }
11347a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
11357a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
11367a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
11377a0afa10SBarry Smith   }
11387a0afa10SBarry Smith 
11397a0afa10SBarry Smith 
1140b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1141abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
114239e00950SLois Curfman McInnes 
1143154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1144154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1145154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
114638597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
114738597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1148154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1149154123eaSLois Curfman McInnes 
115070f0671dSBarry Smith   cmap  = mat->garray;
1151154123eaSLois Curfman McInnes   if (v  || idx) {
1152154123eaSLois Curfman McInnes     if (nztot) {
1153154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
115470f0671dSBarry Smith       int imark = -1;
1155154123eaSLois Curfman McInnes       if (v) {
115670f0671dSBarry Smith         *v = v_p = mat->rowvalues;
115739e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
115870f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1159154123eaSLois Curfman McInnes           else break;
1160154123eaSLois Curfman McInnes         }
1161154123eaSLois Curfman McInnes         imark = i;
116270f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
116370f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1164154123eaSLois Curfman McInnes       }
1165154123eaSLois Curfman McInnes       if (idx) {
116670f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
116770f0671dSBarry Smith         if (imark > -1) {
116870f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
116970f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
117070f0671dSBarry Smith           }
117170f0671dSBarry Smith         } else {
1172154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
117370f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1174154123eaSLois Curfman McInnes             else break;
1175154123eaSLois Curfman McInnes           }
1176154123eaSLois Curfman McInnes           imark = i;
117770f0671dSBarry Smith         }
117870f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
117970f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
118039e00950SLois Curfman McInnes       }
118139e00950SLois Curfman McInnes     }
1182*1ca473b0SSatish Balay     else {
1183*1ca473b0SSatish Balay       if (idx) *idx = 0;
1184*1ca473b0SSatish Balay       if (v)   *v   = 0;
1185*1ca473b0SSatish Balay     }
1186154123eaSLois Curfman McInnes   }
118739e00950SLois Curfman McInnes   *nz = nztot;
118838597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
118938597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
119039e00950SLois Curfman McInnes   return 0;
119139e00950SLois Curfman McInnes }
119239e00950SLois Curfman McInnes 
11936d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
119439e00950SLois Curfman McInnes {
11957a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
11967a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
11977a0afa10SBarry Smith     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
11987a0afa10SBarry Smith   }
11997a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
120039e00950SLois Curfman McInnes   return 0;
120139e00950SLois Curfman McInnes }
120239e00950SLois Curfman McInnes 
1203cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1204855ac2c5SLois Curfman McInnes {
1205855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1206ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1207416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1208416022c9SBarry Smith   double     sum = 0.0;
120904ca555eSLois Curfman McInnes   Scalar     *v;
121004ca555eSLois Curfman McInnes 
121117699dbbSLois Curfman McInnes   if (aij->size == 1) {
121214183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
121337fa93a5SLois Curfman McInnes   } else {
121404ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
121504ca555eSLois Curfman McInnes       v = amat->a;
121604ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
121704ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
121804ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
121904ca555eSLois Curfman McInnes #else
122004ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
122104ca555eSLois Curfman McInnes #endif
122204ca555eSLois Curfman McInnes       }
122304ca555eSLois Curfman McInnes       v = bmat->a;
122404ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
122504ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
122604ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
122704ca555eSLois Curfman McInnes #else
122804ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
122904ca555eSLois Curfman McInnes #endif
123004ca555eSLois Curfman McInnes       }
12316d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
123204ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
123304ca555eSLois Curfman McInnes     }
123404ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
123504ca555eSLois Curfman McInnes       double *tmp, *tmp2;
123604ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
12370452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
12380452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1239cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
124004ca555eSLois Curfman McInnes       *norm = 0.0;
124104ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
124204ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1243579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
124404ca555eSLois Curfman McInnes       }
124504ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
124604ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1247579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
124804ca555eSLois Curfman McInnes       }
12496d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
125004ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
125104ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
125204ca555eSLois Curfman McInnes       }
12530452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
125404ca555eSLois Curfman McInnes     }
125504ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1256515d9167SLois Curfman McInnes       double ntemp = 0.0;
125704ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1258dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
125904ca555eSLois Curfman McInnes         sum = 0.0;
126004ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1261cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
126204ca555eSLois Curfman McInnes         }
1263dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
126404ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1265cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
126604ca555eSLois Curfman McInnes         }
1267515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
126804ca555eSLois Curfman McInnes       }
12696d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
127004ca555eSLois Curfman McInnes     }
127104ca555eSLois Curfman McInnes     else {
1272515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
127304ca555eSLois Curfman McInnes     }
127437fa93a5SLois Curfman McInnes   }
1275855ac2c5SLois Curfman McInnes   return 0;
1276855ac2c5SLois Curfman McInnes }
1277855ac2c5SLois Curfman McInnes 
12780de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1279b7c46309SBarry Smith {
1280b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1281dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1282416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1283416022c9SBarry Smith   Mat        B;
1284b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1285b7c46309SBarry Smith   Scalar     *array;
1286b7c46309SBarry Smith 
12873638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
12883638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1289b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1290b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1291b7c46309SBarry Smith 
1292b7c46309SBarry Smith   /* copy over the A part */
1293ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1294b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1295b7c46309SBarry Smith   row = a->rstart;
1296dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1297b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1298416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1299b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1300b7c46309SBarry Smith   }
1301b7c46309SBarry Smith   aj = Aloc->j;
13024af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1303b7c46309SBarry Smith 
1304b7c46309SBarry Smith   /* copy over the B part */
1305ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1306b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1307b7c46309SBarry Smith   row = a->rstart;
13080452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1309dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1310b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1311416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1312b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1313b7c46309SBarry Smith   }
13140452661fSBarry Smith   PetscFree(ct);
1315b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1316b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
13173638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
13180de55854SLois Curfman McInnes     *matout = B;
13190de55854SLois Curfman McInnes   } else {
13200de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
13210452661fSBarry Smith     PetscFree(a->rowners);
13220de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
13230de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
13240452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
13250452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
13260de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1327a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
13280452661fSBarry Smith     PetscFree(a);
1329416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
13300452661fSBarry Smith     PetscHeaderDestroy(B);
13310de55854SLois Curfman McInnes   }
1332b7c46309SBarry Smith   return 0;
1333b7c46309SBarry Smith }
1334b7c46309SBarry Smith 
1335682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1336682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1337682d7d0cSBarry Smith {
1338682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1339682d7d0cSBarry Smith 
1340682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1341682d7d0cSBarry Smith   else return 0;
1342682d7d0cSBarry Smith }
1343682d7d0cSBarry Smith 
1344fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
13453d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
13462f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1347598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
13488a729477SBarry Smith /* -------------------------------------------------------------------*/
13492ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
135039e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
135144a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
135244a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1353299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1354299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1355299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
135644a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1357b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1358a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1359855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1360ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13611eb62cbbSBarry Smith        0,
1362299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1363299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1364299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1365d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1366299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
13673d1612f7SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1368b49de8d1SLois Curfman McInnes        0,0,0,
1369598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1370052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
1371052efed2SBarry Smith        MatScale_MPIAIJ};
13728a729477SBarry Smith 
13731987afe7SBarry Smith /*@C
1374ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
13753a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
13763a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
13773a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
13783a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
13798a729477SBarry Smith 
13808a729477SBarry Smith    Input Parameters:
13811eb62cbbSBarry Smith .  comm - MPI communicator
13827d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1383fbd6ef76SBarry Smith .      This should be the same as the local size used in creating the
1384fbd6ef76SBarry Smith .      y vector in y = Ax
13857d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
13867d3e4905SLois Curfman McInnes            if N is given)
1387fbd6ef76SBarry Smith .      This should be the same as the local size used in creating the
1388fbd6ef76SBarry Smith .      x vector in y = Ax
13897d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
13907d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
13917d3e4905SLois Curfman McInnes            if n is given)
1392ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1393ff756334SLois Curfman McInnes            (same for all local rows)
13942bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
13952bd5e0b2SLois Curfman McInnes            diagonal portion of local submatrix (possibly different for each row)
13962bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
13972bd5e0b2SLois Curfman McInnes            it is zero.
13982bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1399ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
14002bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
14012bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
14022bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
14038a729477SBarry Smith 
1404ff756334SLois Curfman McInnes    Output Parameter:
140544cd7ae7SLois Curfman McInnes .  A - the matrix
14068a729477SBarry Smith 
1407ff756334SLois Curfman McInnes    Notes:
1408ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1409ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
14100002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
14110002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1412ff756334SLois Curfman McInnes 
1413ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1414ff756334SLois Curfman McInnes    (possibly both).
1415ff756334SLois Curfman McInnes 
14165511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
14175511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
14185511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
14195511cfe3SLois Curfman McInnes 
14205511cfe3SLois Curfman McInnes    Options Database Keys:
14215511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
14226e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
14236e62573dSLois Curfman McInnes $        (max limit=5)
14246e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
14256e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
14266e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
14275511cfe3SLois Curfman McInnes 
1428e0245417SLois Curfman McInnes    Storage Information:
1429e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1430e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1431e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1432e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1433e0245417SLois Curfman McInnes 
1434e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
14355ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
14365ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
14375ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
14385ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1439ff756334SLois Curfman McInnes 
14405511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
14415511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
14422191d07cSBarry Smith 
1443b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1444b810aeb4SBarry Smith $         -------------------
14455511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
14465511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
14475511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
14485511cfe3SLois Curfman McInnes $         -------------------
1449b810aeb4SBarry Smith $
1450b810aeb4SBarry Smith 
14515511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
14525511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
14535511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
14545511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
14555511cfe3SLois Curfman McInnes 
14565511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
14575511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
14585511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
14593d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
14603d323bbdSBarry Smith    or you will get TERRIBLE performance, see the users' manual chapter on
14613d323bbdSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
14623a511b96SLois Curfman McInnes 
1463dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1464ff756334SLois Curfman McInnes 
1465fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
14668a729477SBarry Smith @*/
14671eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
146844cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
14698a729477SBarry Smith {
147044cd7ae7SLois Curfman McInnes   Mat          B;
147144cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
14726abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1473416022c9SBarry Smith 
147444cd7ae7SLois Curfman McInnes   *A = 0;
147544cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
147644cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
147744cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
147844cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
147944cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
148044cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
148144cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
148244cd7ae7SLois Curfman McInnes   B->factor     = 0;
148344cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
1484d6dfbf8fSBarry Smith 
148544cd7ae7SLois Curfman McInnes   b->insertmode = NOT_SET_VALUES;
148644cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
148744cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
14881eb62cbbSBarry Smith 
1489b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
14902e0155e0SLois Curfman McInnes     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
14911987afe7SBarry Smith 
1492dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
14931eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1494d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1495dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1496dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
14971eb62cbbSBarry Smith   }
149844cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
149944cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
150044cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
150144cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
150244cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
150344cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
15041eb62cbbSBarry Smith 
15051eb62cbbSBarry Smith   /* build local table of row and column ownerships */
150644cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
150744cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
150844cd7ae7SLois Curfman McInnes   b->cowners = b->rowners + b->size + 1;
150944cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
151044cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
151144cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
151244cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
15138a729477SBarry Smith   }
151444cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
151544cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
151644cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
151744cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
151844cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
151944cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
15208a729477SBarry Smith   }
152144cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
152244cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
15238a729477SBarry Smith 
15245ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
152544cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
152644cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
15277b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
152844cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
152944cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
15308a729477SBarry Smith 
15311eb62cbbSBarry Smith   /* build cache for off array entries formed */
153244cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
153344cd7ae7SLois Curfman McInnes   b->colmap      = 0;
153444cd7ae7SLois Curfman McInnes   b->garray      = 0;
153544cd7ae7SLois Curfman McInnes   b->roworiented = 1;
15368a729477SBarry Smith 
15371eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
153844cd7ae7SLois Curfman McInnes   b->lvec      = 0;
153944cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
15408a729477SBarry Smith 
15417a0afa10SBarry Smith   /* stuff for MatGetRow() */
154244cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
154344cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
154444cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
15457a0afa10SBarry Smith 
154644cd7ae7SLois Curfman McInnes   *A = B;
1547d6dfbf8fSBarry Smith   return 0;
1548d6dfbf8fSBarry Smith }
1549c74985f6SBarry Smith 
15503d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1551d6dfbf8fSBarry Smith {
1552d6dfbf8fSBarry Smith   Mat        mat;
1553416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1554a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1555d6dfbf8fSBarry Smith 
1556416022c9SBarry Smith   *newmat       = 0;
15570452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1558a5a9c739SBarry Smith   PLogObjectCreate(mat);
15590452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1560416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
156144a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
156244a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1563d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1564c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1565d6dfbf8fSBarry Smith 
156644cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
156744cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
156844cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
156944cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1570d6dfbf8fSBarry Smith 
1571416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1572416022c9SBarry Smith   a->rend         = oldmat->rend;
1573416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1574416022c9SBarry Smith   a->cend         = oldmat->cend;
157517699dbbSLois Curfman McInnes   a->size         = oldmat->size;
157617699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
157764119d58SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
1578bcd2baecSBarry Smith   a->rowvalues    = 0;
1579bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1580d6dfbf8fSBarry Smith 
15810452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
158217699dbbSLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
158317699dbbSLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1584416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
15852ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
15860452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1587416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1588416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1589416022c9SBarry Smith   } else a->colmap = 0;
1590ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
15910452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1592464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1593416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1594416022c9SBarry Smith   } else a->garray = 0;
1595d6dfbf8fSBarry Smith 
1596416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1597416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1598a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1599416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1600416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1601416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1602416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1603416022c9SBarry Smith   PLogObjectParent(mat,a->B);
16045dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
160525cdf11fSBarry Smith   if (flg) {
1606682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1607682d7d0cSBarry Smith   }
16088a729477SBarry Smith   *newmat = mat;
16098a729477SBarry Smith   return 0;
16108a729477SBarry Smith }
1611416022c9SBarry Smith 
161277c4ece6SBarry Smith #include "sys.h"
1613416022c9SBarry Smith 
161419bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1615416022c9SBarry Smith {
1616d65a2f8fSBarry Smith   Mat          A;
1617416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1618d65a2f8fSBarry Smith   Scalar       *vals,*svals;
161919bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1620416022c9SBarry Smith   MPI_Status   status;
162117699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1622d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
162319bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1624416022c9SBarry Smith 
162517699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
162617699dbbSLois Curfman McInnes   if (!rank) {
162790ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
162877c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1629c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1630416022c9SBarry Smith   }
1631416022c9SBarry Smith 
1632416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1633416022c9SBarry Smith   M = header[1]; N = header[2];
1634416022c9SBarry Smith   /* determine ownership of all rows */
163517699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
16360452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1637416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1638416022c9SBarry Smith   rowners[0] = 0;
163917699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1640416022c9SBarry Smith     rowners[i] += rowners[i-1];
1641416022c9SBarry Smith   }
164217699dbbSLois Curfman McInnes   rstart = rowners[rank];
164317699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1644416022c9SBarry Smith 
1645416022c9SBarry Smith   /* distribute row lengths to all processors */
16460452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1647416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
164817699dbbSLois Curfman McInnes   if (!rank) {
16490452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
165077c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
16510452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
165217699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1653d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
16540452661fSBarry Smith     PetscFree(sndcounts);
1655416022c9SBarry Smith   }
1656416022c9SBarry Smith   else {
1657416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1658416022c9SBarry Smith   }
1659416022c9SBarry Smith 
166017699dbbSLois Curfman McInnes   if (!rank) {
1661416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
16620452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1663cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
166417699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1665416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1666416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1667416022c9SBarry Smith       }
1668416022c9SBarry Smith     }
16690452661fSBarry Smith     PetscFree(rowlengths);
1670416022c9SBarry Smith 
1671416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1672416022c9SBarry Smith     maxnz = 0;
167317699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
16740452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1675416022c9SBarry Smith     }
16760452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1677416022c9SBarry Smith 
1678416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1679416022c9SBarry Smith     nz = procsnz[0];
16800452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
168177c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1682d65a2f8fSBarry Smith 
1683d65a2f8fSBarry Smith     /* read in every one elses and ship off */
168417699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1685d65a2f8fSBarry Smith       nz = procsnz[i];
168677c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1687d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1688d65a2f8fSBarry Smith     }
16890452661fSBarry Smith     PetscFree(cols);
1690416022c9SBarry Smith   }
1691416022c9SBarry Smith   else {
1692416022c9SBarry Smith     /* determine buffer space needed for message */
1693416022c9SBarry Smith     nz = 0;
1694416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1695416022c9SBarry Smith       nz += ourlens[i];
1696416022c9SBarry Smith     }
16970452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1698416022c9SBarry Smith 
1699416022c9SBarry Smith     /* receive message of column indices*/
1700d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1701416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1702c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1703416022c9SBarry Smith   }
1704416022c9SBarry Smith 
1705416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1706cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1707416022c9SBarry Smith   jj = 0;
1708416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1709416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1710d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1711416022c9SBarry Smith       jj++;
1712416022c9SBarry Smith     }
1713416022c9SBarry Smith   }
1714d65a2f8fSBarry Smith 
1715d65a2f8fSBarry Smith   /* create our matrix */
1716416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1717416022c9SBarry Smith     ourlens[i] -= offlens[i];
1718416022c9SBarry Smith   }
1719d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1720d65a2f8fSBarry Smith   A = *newmat;
1721d65a2f8fSBarry Smith   MatSetOption(A,COLUMNS_SORTED);
1722d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1723d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1724d65a2f8fSBarry Smith   }
1725416022c9SBarry Smith 
172617699dbbSLois Curfman McInnes   if (!rank) {
17270452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1728416022c9SBarry Smith 
1729416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1730416022c9SBarry Smith     nz = procsnz[0];
173177c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1732d65a2f8fSBarry Smith 
1733d65a2f8fSBarry Smith     /* insert into matrix */
1734d65a2f8fSBarry Smith     jj      = rstart;
1735d65a2f8fSBarry Smith     smycols = mycols;
1736d65a2f8fSBarry Smith     svals   = vals;
1737d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1738d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1739d65a2f8fSBarry Smith       smycols += ourlens[i];
1740d65a2f8fSBarry Smith       svals   += ourlens[i];
1741d65a2f8fSBarry Smith       jj++;
1742416022c9SBarry Smith     }
1743416022c9SBarry Smith 
1744d65a2f8fSBarry Smith     /* read in other processors and ship out */
174517699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1746416022c9SBarry Smith       nz = procsnz[i];
174777c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1748d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1749416022c9SBarry Smith     }
17500452661fSBarry Smith     PetscFree(procsnz);
1751416022c9SBarry Smith   }
1752d65a2f8fSBarry Smith   else {
1753d65a2f8fSBarry Smith     /* receive numeric values */
17540452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1755416022c9SBarry Smith 
1756d65a2f8fSBarry Smith     /* receive message of values*/
1757d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1758d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1759c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1760d65a2f8fSBarry Smith 
1761d65a2f8fSBarry Smith     /* insert into matrix */
1762d65a2f8fSBarry Smith     jj      = rstart;
1763d65a2f8fSBarry Smith     smycols = mycols;
1764d65a2f8fSBarry Smith     svals   = vals;
1765d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1766d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1767d65a2f8fSBarry Smith       smycols += ourlens[i];
1768d65a2f8fSBarry Smith       svals   += ourlens[i];
1769d65a2f8fSBarry Smith       jj++;
1770d65a2f8fSBarry Smith     }
1771d65a2f8fSBarry Smith   }
17720452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1773d65a2f8fSBarry Smith 
1774d65a2f8fSBarry Smith   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1775d65a2f8fSBarry Smith   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1776416022c9SBarry Smith   return 0;
1777416022c9SBarry Smith }
1778