xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d9d09a02e1e5c0c12eda52f6d75965a49fd2c915)
1cb512458SBarry Smith #ifndef lint
2*d9d09a02SSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.146 1996/07/01 15:33:56 bsmith Exp balay $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
7d9942c19SSatish Balay #include "src/inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
2970423df4SBarry Smith static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int        ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
408a729477SBarry Smith {
4144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
434b0e389bSBarry Smith   Scalar     value;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
464b0e389bSBarry Smith   int        shift = C->indexshift,roworiented = aij->roworiented;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
534b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
544b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
564b0e389bSBarry Smith       row = im[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
584b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
594b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
604b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
614b0e389bSBarry Smith           col = in[j] - cstart;
624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
634b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
641eb62cbbSBarry Smith         }
651eb62cbbSBarry Smith         else {
66227d817aSBarry Smith           if (mat->was_assembled) {
67b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
684b0e389bSBarry Smith             col = aij->colmap[in[j]] + shift;
69ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
702493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
714b0e389bSBarry Smith               col =  in[j];
72d6dfbf8fSBarry Smith             }
739e25ed09SBarry Smith           }
744b0e389bSBarry Smith           else col = in[j];
754b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
764b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
771eb62cbbSBarry Smith         }
781eb62cbbSBarry Smith       }
791eb62cbbSBarry Smith     }
801eb62cbbSBarry Smith     else {
814b0e389bSBarry Smith       if (roworiented) {
824b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
834b0e389bSBarry Smith       }
844b0e389bSBarry Smith       else {
854b0e389bSBarry Smith         row = im[i];
864b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
874b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
884b0e389bSBarry Smith         }
894b0e389bSBarry Smith       }
901eb62cbbSBarry Smith     }
918a729477SBarry Smith   }
928a729477SBarry Smith   return 0;
938a729477SBarry Smith }
948a729477SBarry Smith 
95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96b49de8d1SLois Curfman McInnes {
97b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
101b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
102b49de8d1SLois Curfman McInnes 
103b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
104b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
107b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
108b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
109b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
112b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
113b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114b49de8d1SLois Curfman McInnes         }
115b49de8d1SLois Curfman McInnes         else {
11670423df4SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
117b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
118*d9d09a02SSatish Balay           if ( aij->garray[col] != idxn[j] ) *(v+i*n+j) = 0.0;
119*d9d09a02SSatish Balay           else {
120b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
121b49de8d1SLois Curfman McInnes           }
122b49de8d1SLois Curfman McInnes         }
123b49de8d1SLois Curfman McInnes       }
124*d9d09a02SSatish Balay     }
125b49de8d1SLois Curfman McInnes     else {
126b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
127b49de8d1SLois Curfman McInnes     }
128b49de8d1SLois Curfman McInnes   }
129b49de8d1SLois Curfman McInnes   return 0;
130b49de8d1SLois Curfman McInnes }
131b49de8d1SLois Curfman McInnes 
132fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1338a729477SBarry Smith {
13444a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
135d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
13617699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
13717699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1381eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1396abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1401eb62cbbSBarry Smith   InsertMode  addv;
1411eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1421eb62cbbSBarry Smith 
1431eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
144d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
145dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
146bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1471eb62cbbSBarry Smith   }
1481eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1491eb62cbbSBarry Smith 
1501eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1510452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
152cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1530452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1541eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1551eb62cbbSBarry Smith     idx = aij->stash.idx[i];
15617699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1571eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1581eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1598a729477SBarry Smith       }
1608a729477SBarry Smith     }
1618a729477SBarry Smith   }
16217699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1631eb62cbbSBarry Smith 
1641eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1650452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
16617699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
16717699dbbSLois Curfman McInnes   nreceives = work[rank];
16817699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
16917699dbbSLois Curfman McInnes   nmax = work[rank];
1700452661fSBarry Smith   PetscFree(work);
1711eb62cbbSBarry Smith 
1721eb62cbbSBarry Smith   /* post receives:
1731eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1741eb62cbbSBarry Smith      (global index,value) we store the global index as a double
175d6dfbf8fSBarry Smith      to simplify the message passing.
1761eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1771eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1781eb62cbbSBarry Smith      this is a lot of wasted space.
1791eb62cbbSBarry Smith 
1801eb62cbbSBarry Smith 
1811eb62cbbSBarry Smith        This could be done better.
1821eb62cbbSBarry Smith   */
1830452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
18478b31e54SBarry Smith   CHKPTRQ(rvalues);
1850452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
18678b31e54SBarry Smith   CHKPTRQ(recv_waits);
1871eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
188416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1891eb62cbbSBarry Smith               comm,recv_waits+i);
1901eb62cbbSBarry Smith   }
1911eb62cbbSBarry Smith 
1921eb62cbbSBarry Smith   /* do sends:
1931eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1941eb62cbbSBarry Smith          the ith processor
1951eb62cbbSBarry Smith   */
1960452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1970452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
19878b31e54SBarry Smith   CHKPTRQ(send_waits);
1990452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
2001eb62cbbSBarry Smith   starts[0] = 0;
20117699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2021eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2031eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2041eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2051eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2061eb62cbbSBarry Smith   }
2070452661fSBarry Smith   PetscFree(owner);
2081eb62cbbSBarry Smith   starts[0] = 0;
20917699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2101eb62cbbSBarry Smith   count = 0;
21117699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2121eb62cbbSBarry Smith     if (procs[i]) {
213416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2141eb62cbbSBarry Smith                 comm,send_waits+count++);
2151eb62cbbSBarry Smith     }
2161eb62cbbSBarry Smith   }
2170452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2181eb62cbbSBarry Smith 
2191eb62cbbSBarry Smith   /* Free cache space */
2202191d07cSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
22178b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2221eb62cbbSBarry Smith 
2231eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2241eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2251eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2261eb62cbbSBarry Smith   aij->rmax       = nmax;
2271eb62cbbSBarry Smith 
2281eb62cbbSBarry Smith   return 0;
2291eb62cbbSBarry Smith }
23044a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2311eb62cbbSBarry Smith 
232fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2331eb62cbbSBarry Smith {
23444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
235dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2361eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
237416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
238416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2391eb62cbbSBarry Smith   Scalar      *values,val;
2401eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2411eb62cbbSBarry Smith 
2421eb62cbbSBarry Smith   /*  wait on receives */
2431eb62cbbSBarry Smith   while (count) {
244d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2451eb62cbbSBarry Smith     /* unpack receives into our local space */
246d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
247c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2481eb62cbbSBarry Smith     n = n/3;
2491eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
250227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
251227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2521eb62cbbSBarry Smith       val = values[3*i+2];
2531eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2541eb62cbbSBarry Smith         col -= aij->cstart;
2551eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2561eb62cbbSBarry Smith       }
2571eb62cbbSBarry Smith       else {
258227d817aSBarry Smith         if (mat->was_assembled) {
259b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
260dbb450caSBarry Smith           col = aij->colmap[col] + shift;
261ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2622493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
263227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
264d6dfbf8fSBarry Smith           }
2659e25ed09SBarry Smith         }
2661eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2671eb62cbbSBarry Smith       }
2681eb62cbbSBarry Smith     }
2691eb62cbbSBarry Smith     count--;
2701eb62cbbSBarry Smith   }
2710452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2721eb62cbbSBarry Smith 
2731eb62cbbSBarry Smith   /* wait on sends */
2741eb62cbbSBarry Smith   if (aij->nsends) {
2750452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
27678b31e54SBarry Smith     CHKPTRQ(send_status);
2771eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2780452661fSBarry Smith     PetscFree(send_status);
2791eb62cbbSBarry Smith   }
2800452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2811eb62cbbSBarry Smith 
28264119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
28378b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
28478b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2851eb62cbbSBarry Smith 
2862493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2872493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
288227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
289227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
2902493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2912493cbb0SBarry Smith   }
2922493cbb0SBarry Smith 
293227d817aSBarry Smith   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
29478b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
2955e42470aSBarry Smith   }
29678b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
29778b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2985e42470aSBarry Smith 
2997a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
3008a729477SBarry Smith   return 0;
3018a729477SBarry Smith }
3028a729477SBarry Smith 
3032ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3041eb62cbbSBarry Smith {
30544a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
306dbd7a890SLois Curfman McInnes   int        ierr;
30778b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
30878b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3091eb62cbbSBarry Smith   return 0;
3101eb62cbbSBarry Smith }
3111eb62cbbSBarry Smith 
3121eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3131eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3141eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3151eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3161eb62cbbSBarry Smith    routine.
3171eb62cbbSBarry Smith */
31844a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3191eb62cbbSBarry Smith {
32044a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
32117699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3226abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
32317699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3245392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
325d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
326d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3271eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3281eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3291eb62cbbSBarry Smith   IS             istmp;
3301eb62cbbSBarry Smith 
33177c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
33278b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3331eb62cbbSBarry Smith 
3341eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3350452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
336cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3370452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3381eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3391eb62cbbSBarry Smith     idx = rows[i];
3401eb62cbbSBarry Smith     found = 0;
34117699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3421eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3431eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3441eb62cbbSBarry Smith       }
3451eb62cbbSBarry Smith     }
346bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3471eb62cbbSBarry Smith   }
34817699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3491eb62cbbSBarry Smith 
3501eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3510452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
35217699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
35317699dbbSLois Curfman McInnes   nrecvs = work[rank];
35417699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
35517699dbbSLois Curfman McInnes   nmax = work[rank];
3560452661fSBarry Smith   PetscFree(work);
3571eb62cbbSBarry Smith 
3581eb62cbbSBarry Smith   /* post receives:   */
3590452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
36078b31e54SBarry Smith   CHKPTRQ(rvalues);
3610452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
36278b31e54SBarry Smith   CHKPTRQ(recv_waits);
3631eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
364416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3651eb62cbbSBarry Smith   }
3661eb62cbbSBarry Smith 
3671eb62cbbSBarry Smith   /* do sends:
3681eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3691eb62cbbSBarry Smith          the ith processor
3701eb62cbbSBarry Smith   */
3710452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3720452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
37378b31e54SBarry Smith   CHKPTRQ(send_waits);
3740452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3751eb62cbbSBarry Smith   starts[0] = 0;
37617699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3771eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3781eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3791eb62cbbSBarry Smith   }
3801eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3811eb62cbbSBarry Smith 
3821eb62cbbSBarry Smith   starts[0] = 0;
38317699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3841eb62cbbSBarry Smith   count = 0;
38517699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3861eb62cbbSBarry Smith     if (procs[i]) {
387416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3881eb62cbbSBarry Smith     }
3891eb62cbbSBarry Smith   }
3900452661fSBarry Smith   PetscFree(starts);
3911eb62cbbSBarry Smith 
39217699dbbSLois Curfman McInnes   base = owners[rank];
3931eb62cbbSBarry Smith 
3941eb62cbbSBarry Smith   /*  wait on receives */
3950452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3961eb62cbbSBarry Smith   source = lens + nrecvs;
3971eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3981eb62cbbSBarry Smith   while (count) {
399d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
4001eb62cbbSBarry Smith     /* unpack receives into our local space */
4011eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
402d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
403d6dfbf8fSBarry Smith     lens[imdex]  = n;
4041eb62cbbSBarry Smith     slen += n;
4051eb62cbbSBarry Smith     count--;
4061eb62cbbSBarry Smith   }
4070452661fSBarry Smith   PetscFree(recv_waits);
4081eb62cbbSBarry Smith 
4091eb62cbbSBarry Smith   /* move the data into the send scatter */
4100452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4111eb62cbbSBarry Smith   count = 0;
4121eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4131eb62cbbSBarry Smith     values = rvalues + i*nmax;
4141eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4151eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4161eb62cbbSBarry Smith     }
4171eb62cbbSBarry Smith   }
4180452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4190452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4201eb62cbbSBarry Smith 
4211eb62cbbSBarry Smith   /* actually zap the local rows */
422416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
423464493b3SBarry Smith   PLogObjectParent(A,istmp);
4240452661fSBarry Smith   PetscFree(lrows);
42578b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
42678b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
42778b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4281eb62cbbSBarry Smith 
4291eb62cbbSBarry Smith   /* wait on sends */
4301eb62cbbSBarry Smith   if (nsends) {
4310452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
43278b31e54SBarry Smith     CHKPTRQ(send_status);
4331eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4340452661fSBarry Smith     PetscFree(send_status);
4351eb62cbbSBarry Smith   }
4360452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4371eb62cbbSBarry Smith 
4381eb62cbbSBarry Smith   return 0;
4391eb62cbbSBarry Smith }
4401eb62cbbSBarry Smith 
441416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4421eb62cbbSBarry Smith {
443416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
4441eb62cbbSBarry Smith   int        ierr;
445416022c9SBarry Smith 
44664119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
44738597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
44864119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
44938597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4501eb62cbbSBarry Smith   return 0;
4511eb62cbbSBarry Smith }
4521eb62cbbSBarry Smith 
453416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
454da3a660dSBarry Smith {
455416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
456da3a660dSBarry Smith   int        ierr;
45764119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
45838597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
45964119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
46038597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
461da3a660dSBarry Smith   return 0;
462da3a660dSBarry Smith }
463da3a660dSBarry Smith 
464416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
465da3a660dSBarry Smith {
466416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
467da3a660dSBarry Smith   int        ierr;
468da3a660dSBarry Smith 
469da3a660dSBarry Smith   /* do nondiagonal part */
47038597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
471da3a660dSBarry Smith   /* send it on its way */
472416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
47364119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
474da3a660dSBarry Smith   /* do local part */
47538597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
476da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
477da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
478da3a660dSBarry Smith   /* but is not perhaps always true. */
479416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
48064119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
481da3a660dSBarry Smith   return 0;
482da3a660dSBarry Smith }
483da3a660dSBarry Smith 
484416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
485da3a660dSBarry Smith {
486416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
487da3a660dSBarry Smith   int        ierr;
488da3a660dSBarry Smith 
489da3a660dSBarry Smith   /* do nondiagonal part */
49038597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
491da3a660dSBarry Smith   /* send it on its way */
492416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
49364119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
494da3a660dSBarry Smith   /* do local part */
49538597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
496da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
497da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
498da3a660dSBarry Smith   /* but is not perhaps always true. */
499416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
50064119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
501da3a660dSBarry Smith   return 0;
502da3a660dSBarry Smith }
503da3a660dSBarry Smith 
5041eb62cbbSBarry Smith /*
5051eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5061eb62cbbSBarry Smith    diagonal block
5071eb62cbbSBarry Smith */
508416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5091eb62cbbSBarry Smith {
510416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
51144cd7ae7SLois Curfman McInnes   if (a->M != a->N)
51244cd7ae7SLois Curfman McInnes     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
513416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5141eb62cbbSBarry Smith }
5151eb62cbbSBarry Smith 
516052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
517052efed2SBarry Smith {
518052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
519052efed2SBarry Smith   int        ierr;
520052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
521052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
522052efed2SBarry Smith   return 0;
523052efed2SBarry Smith }
524052efed2SBarry Smith 
52544a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5261eb62cbbSBarry Smith {
5271eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
52844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5291eb62cbbSBarry Smith   int        ierr;
530a5a9c739SBarry Smith #if defined(PETSC_LOG)
5316e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
532a5a9c739SBarry Smith #endif
5330452661fSBarry Smith   PetscFree(aij->rowners);
53478b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
53578b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5360452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5370452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5381eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
539a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5407a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
5410452661fSBarry Smith   PetscFree(aij);
542a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5430452661fSBarry Smith   PetscHeaderDestroy(mat);
5441eb62cbbSBarry Smith   return 0;
5451eb62cbbSBarry Smith }
5467857610eSBarry Smith #include "draw.h"
547b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
548ee50ffe9SBarry Smith 
549416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5501eb62cbbSBarry Smith {
551416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
552416022c9SBarry Smith   int         ierr;
553416022c9SBarry Smith 
55417699dbbSLois Curfman McInnes   if (aij->size == 1) {
555416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
556416022c9SBarry Smith   }
557a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
558416022c9SBarry Smith   return 0;
559416022c9SBarry Smith }
560416022c9SBarry Smith 
561d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
562416022c9SBarry Smith {
56344a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
564dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
565a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
566d636dbe3SBarry Smith   FILE        *fd;
56719bcc07fSBarry Smith   ViewerType  vtype;
568416022c9SBarry Smith 
56919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
57019bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
57190ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
57290ace30eSBarry Smith     if (format == ASCII_FORMAT_INFO_DETAILED) {
57395e01e2fSLois Curfman McInnes       int nz, nzalloc, mem, flg;
574a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
57590ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
576a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
57795e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
57877c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
57995e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
58095e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
58195e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
58295e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
58308480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
58495e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz);
58508480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
58695e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz);
587a56f8943SBarry Smith       fflush(fd);
58877c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
589a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
590416022c9SBarry Smith       return 0;
591416022c9SBarry Smith     }
59290ace30eSBarry Smith     else if (format == ASCII_FORMAT_INFO) {
59308480c60SBarry Smith       return 0;
59408480c60SBarry Smith     }
595416022c9SBarry Smith   }
596416022c9SBarry Smith 
59719bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
59819bcc07fSBarry Smith     Draw       draw;
59919bcc07fSBarry Smith     PetscTruth isnull;
60019bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
60119bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
60219bcc07fSBarry Smith   }
60319bcc07fSBarry Smith 
60419bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
60590ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
60677c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
607d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
60817699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
6091eb62cbbSBarry Smith            aij->cend);
61078b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
61178b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
612d13ab20cSBarry Smith     fflush(fd);
61377c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
614d13ab20cSBarry Smith   }
615416022c9SBarry Smith   else {
616a56f8943SBarry Smith     int size = aij->size;
617a56f8943SBarry Smith     rank = aij->rank;
61817699dbbSLois Curfman McInnes     if (size == 1) {
61978b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
62095373324SBarry Smith     }
62195373324SBarry Smith     else {
62295373324SBarry Smith       /* assemble the entire matrix onto first processor. */
62395373324SBarry Smith       Mat         A;
624ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6252eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
62695373324SBarry Smith       Scalar      *a;
6272ee70a88SLois Curfman McInnes 
62817699dbbSLois Curfman McInnes       if (!rank) {
629b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
630c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
63195373324SBarry Smith       }
63295373324SBarry Smith       else {
633b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
634c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
63595373324SBarry Smith       }
636464493b3SBarry Smith       PLogObjectParent(mat,A);
637416022c9SBarry Smith 
63895373324SBarry Smith       /* copy over the A part */
639ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6402ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
64195373324SBarry Smith       row = aij->rstart;
642dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
64395373324SBarry Smith       for ( i=0; i<m; i++ ) {
644416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
64595373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
64695373324SBarry Smith       }
6472ee70a88SLois Curfman McInnes       aj = Aloc->j;
648dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
64995373324SBarry Smith 
65095373324SBarry Smith       /* copy over the B part */
651ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6522ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
65395373324SBarry Smith       row = aij->rstart;
6540452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
655dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
65695373324SBarry Smith       for ( i=0; i<m; i++ ) {
657416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
65895373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
65995373324SBarry Smith       }
6600452661fSBarry Smith       PetscFree(ct);
66178b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
66278b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
66317699dbbSLois Curfman McInnes       if (!rank) {
66478b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
66595373324SBarry Smith       }
66678b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
66795373324SBarry Smith     }
66895373324SBarry Smith   }
6691eb62cbbSBarry Smith   return 0;
6701eb62cbbSBarry Smith }
6711eb62cbbSBarry Smith 
672416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
673416022c9SBarry Smith {
674416022c9SBarry Smith   Mat         mat = (Mat) obj;
675416022c9SBarry Smith   int         ierr;
67619bcc07fSBarry Smith   ViewerType  vtype;
677416022c9SBarry Smith 
678416022c9SBarry Smith   if (!viewer) {
67919bcc07fSBarry Smith     viewer = STDOUT_VIEWER_SELF;
680416022c9SBarry Smith   }
68119bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
68219bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
68319bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
684d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
685416022c9SBarry Smith   }
68619bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
687416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
688416022c9SBarry Smith   }
689416022c9SBarry Smith   return 0;
690416022c9SBarry Smith }
691416022c9SBarry Smith 
6921eb62cbbSBarry Smith /*
6931eb62cbbSBarry Smith     This has to provide several versions.
6941eb62cbbSBarry Smith 
6951eb62cbbSBarry Smith      1) per sequential
6961eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6971eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
698d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
6991eb62cbbSBarry Smith */
700fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
701dbb450caSBarry Smith                            double fshift,int its,Vec xx)
7028a729477SBarry Smith {
70344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
704d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
705ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
7066abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
7076abc6512SBarry Smith   int        ierr,*idx, *diag;
708416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
709da3a660dSBarry Smith   Vec        tt;
7108a729477SBarry Smith 
711d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
712dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
713dbb450caSBarry Smith   ls = ls + shift;
714ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
715d6dfbf8fSBarry Smith   diag = A->diag;
716acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
71748d91487SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
718acb40c82SBarry Smith   }
719da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
720da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
721da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
722da3a660dSBarry Smith 
723da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
724da3a660dSBarry Smith 
725da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
726da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
727da3a660dSBarry Smith     is the relaxation factor.
728da3a660dSBarry Smith     */
72978b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
730464493b3SBarry Smith     PLogObjectParent(matin,tt);
731da3a660dSBarry Smith     VecGetArray(tt,&t);
732da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
733da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
734da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
73564119d58SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
73678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
737da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
738da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
739dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
740dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
741da3a660dSBarry Smith       sum  = b[i];
742da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
743dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
744da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
745dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
746dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
747da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
748da3a660dSBarry Smith       x[i] = omega*(sum/d);
749da3a660dSBarry Smith     }
75064119d58SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
75178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
752da3a660dSBarry Smith 
753da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
754da3a660dSBarry Smith     v = A->a;
755dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
756da3a660dSBarry Smith 
757da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
758dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
759da3a660dSBarry Smith     diag = A->diag;
760da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
76164119d58SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
76278b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
763da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
764da3a660dSBarry Smith       n    = diag[i] - A->i[i];
765dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
766dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
767da3a660dSBarry Smith       sum  = t[i];
768da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
769dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
770da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
771dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
772dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
773da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
774da3a660dSBarry Smith       t[i] = omega*(sum/d);
775da3a660dSBarry Smith     }
77664119d58SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
77778b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
778da3a660dSBarry Smith     /*  x = x + t */
779da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
780da3a660dSBarry Smith     VecDestroy(tt);
781da3a660dSBarry Smith     return 0;
782da3a660dSBarry Smith   }
783da3a660dSBarry Smith 
7841eb62cbbSBarry Smith 
785d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
786da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
787da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
788da3a660dSBarry Smith     }
789da3a660dSBarry Smith     else {
79064119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
79178b31e54SBarry Smith       CHKERRQ(ierr);
79264119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
79378b31e54SBarry Smith       CHKERRQ(ierr);
794da3a660dSBarry Smith     }
795d6dfbf8fSBarry Smith     while (its--) {
796d6dfbf8fSBarry Smith       /* go down through the rows */
79764119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
79878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
799d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
800d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
801dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
802dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
803d6dfbf8fSBarry Smith         sum  = b[i];
804d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
805dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
806d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
807dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
808dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
809d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
810d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
811d6dfbf8fSBarry Smith       }
81264119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
81378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
814d6dfbf8fSBarry Smith       /* come up through the rows */
81564119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
81678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
817d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
818d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
819dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
820dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
821d6dfbf8fSBarry Smith         sum  = b[i];
822d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
823dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
824d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
825dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
826dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
827d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
828d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
829d6dfbf8fSBarry Smith       }
83064119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
83178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
832d6dfbf8fSBarry Smith     }
833d6dfbf8fSBarry Smith   }
834d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
835da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
836da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
83764119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
83878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
839da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
840da3a660dSBarry Smith         n    = diag[i] - A->i[i];
841dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
842dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
843da3a660dSBarry Smith         sum  = b[i];
844da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
845dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
846da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
847dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
848dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
849da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
850da3a660dSBarry Smith         x[i] = omega*(sum/d);
851da3a660dSBarry Smith       }
85264119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
85378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
854da3a660dSBarry Smith       its--;
855da3a660dSBarry Smith     }
856d6dfbf8fSBarry Smith     while (its--) {
85764119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
85878b31e54SBarry Smith       CHKERRQ(ierr);
85964119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
86078b31e54SBarry Smith       CHKERRQ(ierr);
86164119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
86278b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
863d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
864d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
865dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
866dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
867d6dfbf8fSBarry Smith         sum  = b[i];
868d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
869dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
870d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
871dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
872dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
873d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
874d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
875d6dfbf8fSBarry Smith       }
87664119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
87778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
878d6dfbf8fSBarry Smith     }
879d6dfbf8fSBarry Smith   }
880d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
881da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
882da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
88364119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
88478b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
885da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
886da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
887dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
888dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
889da3a660dSBarry Smith         sum  = b[i];
890da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
891dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
892da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
893dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
894dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
895da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
896da3a660dSBarry Smith         x[i] = omega*(sum/d);
897da3a660dSBarry Smith       }
89864119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
89978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
900da3a660dSBarry Smith       its--;
901da3a660dSBarry Smith     }
902d6dfbf8fSBarry Smith     while (its--) {
90364119d58SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
90478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
90564119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
90678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
90764119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
90878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
909d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
910d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
911dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
912dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
913d6dfbf8fSBarry Smith         sum  = b[i];
914d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
915dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
916d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
917dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
918dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
919d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
920d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
921d6dfbf8fSBarry Smith       }
92264119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
92378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
924d6dfbf8fSBarry Smith     }
925d6dfbf8fSBarry Smith   }
926d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
927da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
92838597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
929da3a660dSBarry Smith     }
93064119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
93178b31e54SBarry Smith     CHKERRQ(ierr);
93264119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
93378b31e54SBarry Smith     CHKERRQ(ierr);
934d6dfbf8fSBarry Smith     while (its--) {
935d6dfbf8fSBarry Smith       /* go down through the rows */
936d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
937d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
938dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
939dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
940d6dfbf8fSBarry Smith         sum  = b[i];
941d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
942dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
943d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
944dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
945dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
946d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
947d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
948d6dfbf8fSBarry Smith       }
949d6dfbf8fSBarry Smith       /* come up through the rows */
950d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
951d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
952dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
953dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
954d6dfbf8fSBarry Smith         sum  = b[i];
955d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
956dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
957d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
958dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
959dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
960d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
961d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
962d6dfbf8fSBarry Smith       }
963d6dfbf8fSBarry Smith     }
964d6dfbf8fSBarry Smith   }
965d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
966da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
96738597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
968da3a660dSBarry Smith     }
96964119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
97078b31e54SBarry Smith     CHKERRQ(ierr);
97164119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
97278b31e54SBarry Smith     CHKERRQ(ierr);
973d6dfbf8fSBarry Smith     while (its--) {
974d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
975d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
976dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
977dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
978d6dfbf8fSBarry Smith         sum  = b[i];
979d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
980dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
981d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
982dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
983dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
984d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
985d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
986d6dfbf8fSBarry Smith       }
987d6dfbf8fSBarry Smith     }
988d6dfbf8fSBarry Smith   }
989d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
990da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
99138597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
992da3a660dSBarry Smith     }
99364119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
99478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
99564119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
99678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
997d6dfbf8fSBarry Smith     while (its--) {
998d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
999d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
1000dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
1001dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
1002d6dfbf8fSBarry Smith         sum  = b[i];
1003d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1004dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
1005d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
1006dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
1007dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
1008d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1009d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1010d6dfbf8fSBarry Smith       }
1011d6dfbf8fSBarry Smith     }
1012d6dfbf8fSBarry Smith   }
10138a729477SBarry Smith   return 0;
10148a729477SBarry Smith }
1015a66be287SLois Curfman McInnes 
1016fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1017fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
1018a66be287SLois Curfman McInnes {
1019a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1020a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
1021a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1022a66be287SLois Curfman McInnes 
102378b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
102478b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1025a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1026a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
1027bcd2baecSBarry Smith     if (nz)       *nz      = isend[0];
1028bcd2baecSBarry Smith     if (nzalloc)  *nzalloc = isend[1];
1029bcd2baecSBarry Smith     if (mem)      *mem     = isend[2];
1030a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1031d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1032bcd2baecSBarry Smith     if (nz)      *nz      = irecv[0];
1033bcd2baecSBarry Smith     if (nzalloc) *nzalloc = irecv[1];
1034bcd2baecSBarry Smith     if (mem)     *mem     = irecv[2];
1035a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1036d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1037bcd2baecSBarry Smith     if (nz)      *nz      = irecv[0];
1038bcd2baecSBarry Smith     if (nzalloc) *nzalloc = irecv[1];
1039bcd2baecSBarry Smith     if (mem)     *mem     = irecv[2];
1040a66be287SLois Curfman McInnes   }
1041a66be287SLois Curfman McInnes   return 0;
1042a66be287SLois Curfman McInnes }
1043a66be287SLois Curfman McInnes 
1044299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1045299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1046299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1047299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1048299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1049299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1050299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1051299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1052299609e3SLois Curfman McInnes 
1053416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1054c74985f6SBarry Smith {
1055c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1056c74985f6SBarry Smith 
1057c0bbcb79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
1058c0bbcb79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
1059c0bbcb79SLois Curfman McInnes       op == COLUMNS_SORTED ||
1060c0bbcb79SLois Curfman McInnes       op == ROW_ORIENTED) {
1061c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
1062c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
1063c74985f6SBarry Smith   }
1064c0bbcb79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
1065c0bbcb79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
1066c0bbcb79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1067c0bbcb79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
106894a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10694b0e389bSBarry Smith   else if (op == COLUMN_ORIENTED) {
10704b0e389bSBarry Smith     a->roworiented = 0;
10714b0e389bSBarry Smith     MatSetOption(a->A,op);
10724b0e389bSBarry Smith     MatSetOption(a->B,op);
10734b0e389bSBarry Smith   }
1074c0bbcb79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
10754d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1076c0bbcb79SLois Curfman McInnes   else
10774d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1078c74985f6SBarry Smith   return 0;
1079c74985f6SBarry Smith }
1080c74985f6SBarry Smith 
1081d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1082c74985f6SBarry Smith {
108344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1084c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1085c74985f6SBarry Smith   return 0;
1086c74985f6SBarry Smith }
1087c74985f6SBarry Smith 
1088d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1089c74985f6SBarry Smith {
109044a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1091b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1092c74985f6SBarry Smith   return 0;
1093c74985f6SBarry Smith }
1094c74985f6SBarry Smith 
1095d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1096c74985f6SBarry Smith {
109744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1098c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1099c74985f6SBarry Smith   return 0;
1100c74985f6SBarry Smith }
1101c74985f6SBarry Smith 
11026d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11036d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11046d84be18SBarry Smith 
11056d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
110639e00950SLois Curfman McInnes {
1107154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
110870f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1109154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1110154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
111170f0671dSBarry Smith   int        *cmap, *idx_p;
111239e00950SLois Curfman McInnes 
11137a0afa10SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
11147a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
11157a0afa10SBarry Smith 
111670f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
11177a0afa10SBarry Smith     /*
11187a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
11197a0afa10SBarry Smith     */
11207a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
11217a0afa10SBarry Smith     int     max = 1,n = mat->n,tmp;
11227a0afa10SBarry Smith     for ( i=0; i<n; i++ ) {
11237a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
11247a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
11257a0afa10SBarry Smith     }
11267a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
11277a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
11287a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
11297a0afa10SBarry Smith   }
11307a0afa10SBarry Smith 
11317a0afa10SBarry Smith 
1132b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1133abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
113439e00950SLois Curfman McInnes 
1135154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1136154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1137154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
113838597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
113938597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1140154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1141154123eaSLois Curfman McInnes 
114270f0671dSBarry Smith   cmap  = mat->garray;
1143154123eaSLois Curfman McInnes   if (v  || idx) {
1144154123eaSLois Curfman McInnes     if (nztot) {
1145154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
114670f0671dSBarry Smith       int imark = -1;
1147154123eaSLois Curfman McInnes       if (v) {
114870f0671dSBarry Smith         *v = v_p = mat->rowvalues;
114939e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
115070f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1151154123eaSLois Curfman McInnes           else break;
1152154123eaSLois Curfman McInnes         }
1153154123eaSLois Curfman McInnes         imark = i;
115470f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
115570f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1156154123eaSLois Curfman McInnes       }
1157154123eaSLois Curfman McInnes       if (idx) {
115870f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
115970f0671dSBarry Smith         if (imark > -1) {
116070f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
116170f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
116270f0671dSBarry Smith           }
116370f0671dSBarry Smith         } else {
1164154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
116570f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1166154123eaSLois Curfman McInnes             else break;
1167154123eaSLois Curfman McInnes           }
1168154123eaSLois Curfman McInnes           imark = i;
116970f0671dSBarry Smith         }
117070f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
117170f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
117239e00950SLois Curfman McInnes       }
117339e00950SLois Curfman McInnes     }
117439e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1175154123eaSLois Curfman McInnes   }
117639e00950SLois Curfman McInnes   *nz = nztot;
117738597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
117838597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
117939e00950SLois Curfman McInnes   return 0;
118039e00950SLois Curfman McInnes }
118139e00950SLois Curfman McInnes 
11826d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
118339e00950SLois Curfman McInnes {
11847a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
11857a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
11867a0afa10SBarry Smith     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
11877a0afa10SBarry Smith   }
11887a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
118939e00950SLois Curfman McInnes   return 0;
119039e00950SLois Curfman McInnes }
119139e00950SLois Curfman McInnes 
1192cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1193855ac2c5SLois Curfman McInnes {
1194855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1195ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1196416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1197416022c9SBarry Smith   double     sum = 0.0;
119804ca555eSLois Curfman McInnes   Scalar     *v;
119904ca555eSLois Curfman McInnes 
120017699dbbSLois Curfman McInnes   if (aij->size == 1) {
120114183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
120237fa93a5SLois Curfman McInnes   } else {
120304ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
120404ca555eSLois Curfman McInnes       v = amat->a;
120504ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
120604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
120704ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
120804ca555eSLois Curfman McInnes #else
120904ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
121004ca555eSLois Curfman McInnes #endif
121104ca555eSLois Curfman McInnes       }
121204ca555eSLois Curfman McInnes       v = bmat->a;
121304ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
121404ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
121504ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
121604ca555eSLois Curfman McInnes #else
121704ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
121804ca555eSLois Curfman McInnes #endif
121904ca555eSLois Curfman McInnes       }
12206d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
122104ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
122204ca555eSLois Curfman McInnes     }
122304ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
122404ca555eSLois Curfman McInnes       double *tmp, *tmp2;
122504ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
12260452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
12270452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1228cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
122904ca555eSLois Curfman McInnes       *norm = 0.0;
123004ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
123104ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1232579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
123304ca555eSLois Curfman McInnes       }
123404ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
123504ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1236579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
123704ca555eSLois Curfman McInnes       }
12386d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
123904ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
124004ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
124104ca555eSLois Curfman McInnes       }
12420452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
124304ca555eSLois Curfman McInnes     }
124404ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1245515d9167SLois Curfman McInnes       double ntemp = 0.0;
124604ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1247dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
124804ca555eSLois Curfman McInnes         sum = 0.0;
124904ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1250cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
125104ca555eSLois Curfman McInnes         }
1252dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
125304ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1254cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
125504ca555eSLois Curfman McInnes         }
1256515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
125704ca555eSLois Curfman McInnes       }
12586d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
125904ca555eSLois Curfman McInnes     }
126004ca555eSLois Curfman McInnes     else {
1261515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
126204ca555eSLois Curfman McInnes     }
126337fa93a5SLois Curfman McInnes   }
1264855ac2c5SLois Curfman McInnes   return 0;
1265855ac2c5SLois Curfman McInnes }
1266855ac2c5SLois Curfman McInnes 
12670de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1268b7c46309SBarry Smith {
1269b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1270dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1271416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1272416022c9SBarry Smith   Mat        B;
1273b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1274b7c46309SBarry Smith   Scalar     *array;
1275b7c46309SBarry Smith 
12763638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
12773638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1278b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1279b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1280b7c46309SBarry Smith 
1281b7c46309SBarry Smith   /* copy over the A part */
1282ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1283b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1284b7c46309SBarry Smith   row = a->rstart;
1285dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1286b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1287416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1288b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1289b7c46309SBarry Smith   }
1290b7c46309SBarry Smith   aj = Aloc->j;
12914af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1292b7c46309SBarry Smith 
1293b7c46309SBarry Smith   /* copy over the B part */
1294ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1295b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1296b7c46309SBarry Smith   row = a->rstart;
12970452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1298dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1299b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1300416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1301b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1302b7c46309SBarry Smith   }
13030452661fSBarry Smith   PetscFree(ct);
1304b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1305b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
13063638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
13070de55854SLois Curfman McInnes     *matout = B;
13080de55854SLois Curfman McInnes   } else {
13090de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
13100452661fSBarry Smith     PetscFree(a->rowners);
13110de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
13120de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
13130452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
13140452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
13150de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1316a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
13170452661fSBarry Smith     PetscFree(a);
1318416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
13190452661fSBarry Smith     PetscHeaderDestroy(B);
13200de55854SLois Curfman McInnes   }
1321b7c46309SBarry Smith   return 0;
1322b7c46309SBarry Smith }
1323b7c46309SBarry Smith 
1324682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1325682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1326682d7d0cSBarry Smith {
1327682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1328682d7d0cSBarry Smith 
1329682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1330682d7d0cSBarry Smith   else return 0;
1331682d7d0cSBarry Smith }
1332682d7d0cSBarry Smith 
1333fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
13343d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
13352f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1336598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
13378a729477SBarry Smith /* -------------------------------------------------------------------*/
13382ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
133939e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
134044a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
134144a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1342299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1343299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1344299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
134544a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1346b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1347a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1348855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1349ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13501eb62cbbSBarry Smith        0,
1351299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1352299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1353299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1354d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1355299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
13563d1612f7SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1357b49de8d1SLois Curfman McInnes        0,0,0,
1358598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1359052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
1360052efed2SBarry Smith        MatScale_MPIAIJ};
13618a729477SBarry Smith 
13621987afe7SBarry Smith /*@C
1363ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
13643a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
13653a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
13663a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
13673a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
13688a729477SBarry Smith 
13698a729477SBarry Smith    Input Parameters:
13701eb62cbbSBarry Smith .  comm - MPI communicator
13717d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
13727d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
13737d3e4905SLois Curfman McInnes            if N is given)
13747d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
13757d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
13767d3e4905SLois Curfman McInnes            if n is given)
1377ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1378ff756334SLois Curfman McInnes            (same for all local rows)
13792bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
13802bd5e0b2SLois Curfman McInnes            diagonal portion of local submatrix (possibly different for each row)
13812bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
13822bd5e0b2SLois Curfman McInnes            it is zero.
13832bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1384ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
13852bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
13862bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
13872bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
13888a729477SBarry Smith 
1389ff756334SLois Curfman McInnes    Output Parameter:
139044cd7ae7SLois Curfman McInnes .  A - the matrix
13918a729477SBarry Smith 
1392ff756334SLois Curfman McInnes    Notes:
1393ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1394ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
13950002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
13960002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1397ff756334SLois Curfman McInnes 
1398ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1399ff756334SLois Curfman McInnes    (possibly both).
1400ff756334SLois Curfman McInnes 
14015511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
14025511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
14035511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
14045511cfe3SLois Curfman McInnes 
14055511cfe3SLois Curfman McInnes    Options Database Keys:
14065511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
14076e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
14086e62573dSLois Curfman McInnes $        (max limit=5)
14096e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
14106e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
14116e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
14125511cfe3SLois Curfman McInnes 
1413e0245417SLois Curfman McInnes    Storage Information:
1414e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1415e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1416e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1417e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1418e0245417SLois Curfman McInnes 
1419e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
14205ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
14215ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
14225ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
14235ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1424ff756334SLois Curfman McInnes 
14255511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
14265511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
14272191d07cSBarry Smith 
1428b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1429b810aeb4SBarry Smith $         -------------------
14305511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
14315511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
14325511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
14335511cfe3SLois Curfman McInnes $         -------------------
1434b810aeb4SBarry Smith $
1435b810aeb4SBarry Smith 
14365511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
14375511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
14385511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
14395511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
14405511cfe3SLois Curfman McInnes 
14415511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
14425511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
14435511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
14443d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
14453d323bbdSBarry Smith    or you will get TERRIBLE performance, see the users' manual chapter on
14463d323bbdSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
14473a511b96SLois Curfman McInnes 
1448dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1449ff756334SLois Curfman McInnes 
1450fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
14518a729477SBarry Smith @*/
14521eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
145344cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
14548a729477SBarry Smith {
145544cd7ae7SLois Curfman McInnes   Mat          B;
145644cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
14576abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1458416022c9SBarry Smith 
145944cd7ae7SLois Curfman McInnes   *A = 0;
146044cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
146144cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
146244cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
146344cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
146444cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
146544cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
146644cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
146744cd7ae7SLois Curfman McInnes   B->factor     = 0;
146844cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
1469d6dfbf8fSBarry Smith 
147044cd7ae7SLois Curfman McInnes   b->insertmode = NOT_SET_VALUES;
147144cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
147244cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
14731eb62cbbSBarry Smith 
1474b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
14752e0155e0SLois Curfman McInnes     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
14761987afe7SBarry Smith 
1477dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
14781eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1479d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1480dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1481dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
14821eb62cbbSBarry Smith   }
148344cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
148444cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
148544cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
148644cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
148744cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
148844cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
14891eb62cbbSBarry Smith 
14901eb62cbbSBarry Smith   /* build local table of row and column ownerships */
149144cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
149244cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
149344cd7ae7SLois Curfman McInnes   b->cowners = b->rowners + b->size + 1;
149444cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
149544cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
149644cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
149744cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
14988a729477SBarry Smith   }
149944cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
150044cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
150144cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
150244cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
150344cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
150444cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
15058a729477SBarry Smith   }
150644cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
150744cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
15088a729477SBarry Smith 
15095ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
151044cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
151144cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
15127b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
151344cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
151444cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
15158a729477SBarry Smith 
15161eb62cbbSBarry Smith   /* build cache for off array entries formed */
151744cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
151844cd7ae7SLois Curfman McInnes   b->colmap      = 0;
151944cd7ae7SLois Curfman McInnes   b->garray      = 0;
152044cd7ae7SLois Curfman McInnes   b->roworiented = 1;
15218a729477SBarry Smith 
15221eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
152344cd7ae7SLois Curfman McInnes   b->lvec      = 0;
152444cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
15258a729477SBarry Smith 
15267a0afa10SBarry Smith   /* stuff for MatGetRow() */
152744cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
152844cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
152944cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
15307a0afa10SBarry Smith 
153144cd7ae7SLois Curfman McInnes   *A = B;
1532d6dfbf8fSBarry Smith   return 0;
1533d6dfbf8fSBarry Smith }
1534c74985f6SBarry Smith 
15353d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1536d6dfbf8fSBarry Smith {
1537d6dfbf8fSBarry Smith   Mat        mat;
1538416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1539a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1540d6dfbf8fSBarry Smith 
1541416022c9SBarry Smith   *newmat       = 0;
15420452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1543a5a9c739SBarry Smith   PLogObjectCreate(mat);
15440452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1545416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
154644a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
154744a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1548d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1549c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1550d6dfbf8fSBarry Smith 
155144cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
155244cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
155344cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
155444cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1555d6dfbf8fSBarry Smith 
1556416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1557416022c9SBarry Smith   a->rend         = oldmat->rend;
1558416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1559416022c9SBarry Smith   a->cend         = oldmat->cend;
156017699dbbSLois Curfman McInnes   a->size         = oldmat->size;
156117699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
156264119d58SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
1563bcd2baecSBarry Smith   a->rowvalues    = 0;
1564bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1565d6dfbf8fSBarry Smith 
15660452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
156717699dbbSLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
156817699dbbSLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1569416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
15702ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
15710452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1572416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1573416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1574416022c9SBarry Smith   } else a->colmap = 0;
1575ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
15760452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1577464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1578416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1579416022c9SBarry Smith   } else a->garray = 0;
1580d6dfbf8fSBarry Smith 
1581416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1582416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1583a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1584416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1585416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1586416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1587416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1588416022c9SBarry Smith   PLogObjectParent(mat,a->B);
15895dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
159025cdf11fSBarry Smith   if (flg) {
1591682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1592682d7d0cSBarry Smith   }
15938a729477SBarry Smith   *newmat = mat;
15948a729477SBarry Smith   return 0;
15958a729477SBarry Smith }
1596416022c9SBarry Smith 
159777c4ece6SBarry Smith #include "sys.h"
1598416022c9SBarry Smith 
159919bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1600416022c9SBarry Smith {
1601d65a2f8fSBarry Smith   Mat          A;
1602416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1603d65a2f8fSBarry Smith   Scalar       *vals,*svals;
160419bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1605416022c9SBarry Smith   MPI_Status   status;
160617699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1607d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
160819bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1609416022c9SBarry Smith 
161017699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
161117699dbbSLois Curfman McInnes   if (!rank) {
161290ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
161377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1614c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1615416022c9SBarry Smith   }
1616416022c9SBarry Smith 
1617416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1618416022c9SBarry Smith   M = header[1]; N = header[2];
1619416022c9SBarry Smith   /* determine ownership of all rows */
162017699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
16210452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1622416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1623416022c9SBarry Smith   rowners[0] = 0;
162417699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1625416022c9SBarry Smith     rowners[i] += rowners[i-1];
1626416022c9SBarry Smith   }
162717699dbbSLois Curfman McInnes   rstart = rowners[rank];
162817699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1629416022c9SBarry Smith 
1630416022c9SBarry Smith   /* distribute row lengths to all processors */
16310452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1632416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
163317699dbbSLois Curfman McInnes   if (!rank) {
16340452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
163577c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
16360452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
163717699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1638d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
16390452661fSBarry Smith     PetscFree(sndcounts);
1640416022c9SBarry Smith   }
1641416022c9SBarry Smith   else {
1642416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1643416022c9SBarry Smith   }
1644416022c9SBarry Smith 
164517699dbbSLois Curfman McInnes   if (!rank) {
1646416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
16470452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1648cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
164917699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1650416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1651416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1652416022c9SBarry Smith       }
1653416022c9SBarry Smith     }
16540452661fSBarry Smith     PetscFree(rowlengths);
1655416022c9SBarry Smith 
1656416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1657416022c9SBarry Smith     maxnz = 0;
165817699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
16590452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1660416022c9SBarry Smith     }
16610452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1662416022c9SBarry Smith 
1663416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1664416022c9SBarry Smith     nz = procsnz[0];
16650452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
166677c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1667d65a2f8fSBarry Smith 
1668d65a2f8fSBarry Smith     /* read in every one elses and ship off */
166917699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1670d65a2f8fSBarry Smith       nz = procsnz[i];
167177c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1672d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1673d65a2f8fSBarry Smith     }
16740452661fSBarry Smith     PetscFree(cols);
1675416022c9SBarry Smith   }
1676416022c9SBarry Smith   else {
1677416022c9SBarry Smith     /* determine buffer space needed for message */
1678416022c9SBarry Smith     nz = 0;
1679416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1680416022c9SBarry Smith       nz += ourlens[i];
1681416022c9SBarry Smith     }
16820452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1683416022c9SBarry Smith 
1684416022c9SBarry Smith     /* receive message of column indices*/
1685d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1686416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1687c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1688416022c9SBarry Smith   }
1689416022c9SBarry Smith 
1690416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1691cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1692416022c9SBarry Smith   jj = 0;
1693416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1694416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1695d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1696416022c9SBarry Smith       jj++;
1697416022c9SBarry Smith     }
1698416022c9SBarry Smith   }
1699d65a2f8fSBarry Smith 
1700d65a2f8fSBarry Smith   /* create our matrix */
1701416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1702416022c9SBarry Smith     ourlens[i] -= offlens[i];
1703416022c9SBarry Smith   }
1704d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1705d65a2f8fSBarry Smith   A = *newmat;
1706d65a2f8fSBarry Smith   MatSetOption(A,COLUMNS_SORTED);
1707d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1708d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1709d65a2f8fSBarry Smith   }
1710416022c9SBarry Smith 
171117699dbbSLois Curfman McInnes   if (!rank) {
17120452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1713416022c9SBarry Smith 
1714416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1715416022c9SBarry Smith     nz = procsnz[0];
171677c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1717d65a2f8fSBarry Smith 
1718d65a2f8fSBarry Smith     /* insert into matrix */
1719d65a2f8fSBarry Smith     jj      = rstart;
1720d65a2f8fSBarry Smith     smycols = mycols;
1721d65a2f8fSBarry Smith     svals   = vals;
1722d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1723d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1724d65a2f8fSBarry Smith       smycols += ourlens[i];
1725d65a2f8fSBarry Smith       svals   += ourlens[i];
1726d65a2f8fSBarry Smith       jj++;
1727416022c9SBarry Smith     }
1728416022c9SBarry Smith 
1729d65a2f8fSBarry Smith     /* read in other processors and ship out */
173017699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1731416022c9SBarry Smith       nz = procsnz[i];
173277c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1733d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1734416022c9SBarry Smith     }
17350452661fSBarry Smith     PetscFree(procsnz);
1736416022c9SBarry Smith   }
1737d65a2f8fSBarry Smith   else {
1738d65a2f8fSBarry Smith     /* receive numeric values */
17390452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1740416022c9SBarry Smith 
1741d65a2f8fSBarry Smith     /* receive message of values*/
1742d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1743d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1744c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1745d65a2f8fSBarry Smith 
1746d65a2f8fSBarry Smith     /* insert into matrix */
1747d65a2f8fSBarry Smith     jj      = rstart;
1748d65a2f8fSBarry Smith     smycols = mycols;
1749d65a2f8fSBarry Smith     svals   = vals;
1750d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1751d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1752d65a2f8fSBarry Smith       smycols += ourlens[i];
1753d65a2f8fSBarry Smith       svals   += ourlens[i];
1754d65a2f8fSBarry Smith       jj++;
1755d65a2f8fSBarry Smith     }
1756d65a2f8fSBarry Smith   }
17570452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1758d65a2f8fSBarry Smith 
1759d65a2f8fSBarry Smith   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1760d65a2f8fSBarry Smith   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1761416022c9SBarry Smith   return 0;
1762416022c9SBarry Smith }
1763