xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 70423df42639036d7617fd1625ff853510ae094b)
1cb512458SBarry Smith #ifndef lint
2*70423df4SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.145 1996/06/27 02:49:15 bsmith Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
7d9942c19SSatish Balay #include "src/inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
29*70423df4SBarry Smith static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int        ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
408a729477SBarry Smith {
4144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
434b0e389bSBarry Smith   Scalar     value;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
464b0e389bSBarry Smith   int        shift = C->indexshift,roworiented = aij->roworiented;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
534b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
544b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
564b0e389bSBarry Smith       row = im[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
584b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
594b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
604b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
614b0e389bSBarry Smith           col = in[j] - cstart;
624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
634b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
641eb62cbbSBarry Smith         }
651eb62cbbSBarry Smith         else {
66227d817aSBarry Smith           if (mat->was_assembled) {
67b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
684b0e389bSBarry Smith             col = aij->colmap[in[j]] + shift;
69ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
702493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
714b0e389bSBarry Smith               col =  in[j];
72d6dfbf8fSBarry Smith             }
739e25ed09SBarry Smith           }
744b0e389bSBarry Smith           else col = in[j];
754b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
764b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
771eb62cbbSBarry Smith         }
781eb62cbbSBarry Smith       }
791eb62cbbSBarry Smith     }
801eb62cbbSBarry Smith     else {
814b0e389bSBarry Smith       if (roworiented) {
824b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
834b0e389bSBarry Smith       }
844b0e389bSBarry Smith       else {
854b0e389bSBarry Smith         row = im[i];
864b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
874b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
884b0e389bSBarry Smith         }
894b0e389bSBarry Smith       }
901eb62cbbSBarry Smith     }
918a729477SBarry Smith   }
928a729477SBarry Smith   return 0;
938a729477SBarry Smith }
948a729477SBarry Smith 
95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96b49de8d1SLois Curfman McInnes {
97b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
101b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
102b49de8d1SLois Curfman McInnes 
103b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
104b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
107b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
108b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
109b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
112b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
113b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114b49de8d1SLois Curfman McInnes         }
115b49de8d1SLois Curfman McInnes         else {
116*70423df4SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
117b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
118b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
119b49de8d1SLois Curfman McInnes         }
120b49de8d1SLois Curfman McInnes       }
121b49de8d1SLois Curfman McInnes     }
122b49de8d1SLois Curfman McInnes     else {
123b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
124b49de8d1SLois Curfman McInnes     }
125b49de8d1SLois Curfman McInnes   }
126b49de8d1SLois Curfman McInnes   return 0;
127b49de8d1SLois Curfman McInnes }
128b49de8d1SLois Curfman McInnes 
129fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1308a729477SBarry Smith {
13144a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
132d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
13317699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
13417699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1351eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1366abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1371eb62cbbSBarry Smith   InsertMode  addv;
1381eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1391eb62cbbSBarry Smith 
1401eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
141d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
142dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
143bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1441eb62cbbSBarry Smith   }
1451eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1461eb62cbbSBarry Smith 
1471eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1480452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
149cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1500452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1511eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1521eb62cbbSBarry Smith     idx = aij->stash.idx[i];
15317699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1541eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1551eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1568a729477SBarry Smith       }
1578a729477SBarry Smith     }
1588a729477SBarry Smith   }
15917699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1601eb62cbbSBarry Smith 
1611eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1620452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
16317699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
16417699dbbSLois Curfman McInnes   nreceives = work[rank];
16517699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
16617699dbbSLois Curfman McInnes   nmax = work[rank];
1670452661fSBarry Smith   PetscFree(work);
1681eb62cbbSBarry Smith 
1691eb62cbbSBarry Smith   /* post receives:
1701eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1711eb62cbbSBarry Smith      (global index,value) we store the global index as a double
172d6dfbf8fSBarry Smith      to simplify the message passing.
1731eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1741eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1751eb62cbbSBarry Smith      this is a lot of wasted space.
1761eb62cbbSBarry Smith 
1771eb62cbbSBarry Smith 
1781eb62cbbSBarry Smith        This could be done better.
1791eb62cbbSBarry Smith   */
1800452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
18178b31e54SBarry Smith   CHKPTRQ(rvalues);
1820452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
18378b31e54SBarry Smith   CHKPTRQ(recv_waits);
1841eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
185416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1861eb62cbbSBarry Smith               comm,recv_waits+i);
1871eb62cbbSBarry Smith   }
1881eb62cbbSBarry Smith 
1891eb62cbbSBarry Smith   /* do sends:
1901eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1911eb62cbbSBarry Smith          the ith processor
1921eb62cbbSBarry Smith   */
1930452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1940452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
19578b31e54SBarry Smith   CHKPTRQ(send_waits);
1960452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1971eb62cbbSBarry Smith   starts[0] = 0;
19817699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1991eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2001eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2011eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2021eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2031eb62cbbSBarry Smith   }
2040452661fSBarry Smith   PetscFree(owner);
2051eb62cbbSBarry Smith   starts[0] = 0;
20617699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2071eb62cbbSBarry Smith   count = 0;
20817699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2091eb62cbbSBarry Smith     if (procs[i]) {
210416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2111eb62cbbSBarry Smith                 comm,send_waits+count++);
2121eb62cbbSBarry Smith     }
2131eb62cbbSBarry Smith   }
2140452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2151eb62cbbSBarry Smith 
2161eb62cbbSBarry Smith   /* Free cache space */
2172191d07cSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
21878b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2191eb62cbbSBarry Smith 
2201eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2211eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2221eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2231eb62cbbSBarry Smith   aij->rmax       = nmax;
2241eb62cbbSBarry Smith 
2251eb62cbbSBarry Smith   return 0;
2261eb62cbbSBarry Smith }
22744a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2281eb62cbbSBarry Smith 
229fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2301eb62cbbSBarry Smith {
23144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
232dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2331eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
234416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
235416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2361eb62cbbSBarry Smith   Scalar      *values,val;
2371eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2381eb62cbbSBarry Smith 
2391eb62cbbSBarry Smith   /*  wait on receives */
2401eb62cbbSBarry Smith   while (count) {
241d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2421eb62cbbSBarry Smith     /* unpack receives into our local space */
243d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
244c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2451eb62cbbSBarry Smith     n = n/3;
2461eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
247227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
248227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2491eb62cbbSBarry Smith       val = values[3*i+2];
2501eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2511eb62cbbSBarry Smith         col -= aij->cstart;
2521eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2531eb62cbbSBarry Smith       }
2541eb62cbbSBarry Smith       else {
255227d817aSBarry Smith         if (mat->was_assembled) {
256b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
257dbb450caSBarry Smith           col = aij->colmap[col] + shift;
258ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2592493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
260227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
261d6dfbf8fSBarry Smith           }
2629e25ed09SBarry Smith         }
2631eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2641eb62cbbSBarry Smith       }
2651eb62cbbSBarry Smith     }
2661eb62cbbSBarry Smith     count--;
2671eb62cbbSBarry Smith   }
2680452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2691eb62cbbSBarry Smith 
2701eb62cbbSBarry Smith   /* wait on sends */
2711eb62cbbSBarry Smith   if (aij->nsends) {
2720452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
27378b31e54SBarry Smith     CHKPTRQ(send_status);
2741eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2750452661fSBarry Smith     PetscFree(send_status);
2761eb62cbbSBarry Smith   }
2770452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2781eb62cbbSBarry Smith 
27964119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
28078b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
28178b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2821eb62cbbSBarry Smith 
2832493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2842493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
285227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
286227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
2872493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2882493cbb0SBarry Smith   }
2892493cbb0SBarry Smith 
290227d817aSBarry Smith   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
29178b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
2925e42470aSBarry Smith   }
29378b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
29478b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2955e42470aSBarry Smith 
2967a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
2978a729477SBarry Smith   return 0;
2988a729477SBarry Smith }
2998a729477SBarry Smith 
3002ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3011eb62cbbSBarry Smith {
30244a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
303dbd7a890SLois Curfman McInnes   int        ierr;
30478b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
30578b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3061eb62cbbSBarry Smith   return 0;
3071eb62cbbSBarry Smith }
3081eb62cbbSBarry Smith 
3091eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3101eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3111eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3121eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3131eb62cbbSBarry Smith    routine.
3141eb62cbbSBarry Smith */
31544a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3161eb62cbbSBarry Smith {
31744a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
31817699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3196abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
32017699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3215392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
322d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
323d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3241eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3251eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3261eb62cbbSBarry Smith   IS             istmp;
3271eb62cbbSBarry Smith 
32877c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
32978b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3301eb62cbbSBarry Smith 
3311eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3320452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
333cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3340452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3351eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3361eb62cbbSBarry Smith     idx = rows[i];
3371eb62cbbSBarry Smith     found = 0;
33817699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3391eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3401eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3411eb62cbbSBarry Smith       }
3421eb62cbbSBarry Smith     }
343bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3441eb62cbbSBarry Smith   }
34517699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3461eb62cbbSBarry Smith 
3471eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3480452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
34917699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
35017699dbbSLois Curfman McInnes   nrecvs = work[rank];
35117699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
35217699dbbSLois Curfman McInnes   nmax = work[rank];
3530452661fSBarry Smith   PetscFree(work);
3541eb62cbbSBarry Smith 
3551eb62cbbSBarry Smith   /* post receives:   */
3560452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
35778b31e54SBarry Smith   CHKPTRQ(rvalues);
3580452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
35978b31e54SBarry Smith   CHKPTRQ(recv_waits);
3601eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
361416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3621eb62cbbSBarry Smith   }
3631eb62cbbSBarry Smith 
3641eb62cbbSBarry Smith   /* do sends:
3651eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3661eb62cbbSBarry Smith          the ith processor
3671eb62cbbSBarry Smith   */
3680452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3690452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
37078b31e54SBarry Smith   CHKPTRQ(send_waits);
3710452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3721eb62cbbSBarry Smith   starts[0] = 0;
37317699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3741eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3751eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3761eb62cbbSBarry Smith   }
3771eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3781eb62cbbSBarry Smith 
3791eb62cbbSBarry Smith   starts[0] = 0;
38017699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3811eb62cbbSBarry Smith   count = 0;
38217699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3831eb62cbbSBarry Smith     if (procs[i]) {
384416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3851eb62cbbSBarry Smith     }
3861eb62cbbSBarry Smith   }
3870452661fSBarry Smith   PetscFree(starts);
3881eb62cbbSBarry Smith 
38917699dbbSLois Curfman McInnes   base = owners[rank];
3901eb62cbbSBarry Smith 
3911eb62cbbSBarry Smith   /*  wait on receives */
3920452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3931eb62cbbSBarry Smith   source = lens + nrecvs;
3941eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3951eb62cbbSBarry Smith   while (count) {
396d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3971eb62cbbSBarry Smith     /* unpack receives into our local space */
3981eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
399d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
400d6dfbf8fSBarry Smith     lens[imdex]  = n;
4011eb62cbbSBarry Smith     slen += n;
4021eb62cbbSBarry Smith     count--;
4031eb62cbbSBarry Smith   }
4040452661fSBarry Smith   PetscFree(recv_waits);
4051eb62cbbSBarry Smith 
4061eb62cbbSBarry Smith   /* move the data into the send scatter */
4070452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4081eb62cbbSBarry Smith   count = 0;
4091eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4101eb62cbbSBarry Smith     values = rvalues + i*nmax;
4111eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4121eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4131eb62cbbSBarry Smith     }
4141eb62cbbSBarry Smith   }
4150452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4160452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4171eb62cbbSBarry Smith 
4181eb62cbbSBarry Smith   /* actually zap the local rows */
419416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
420464493b3SBarry Smith   PLogObjectParent(A,istmp);
4210452661fSBarry Smith   PetscFree(lrows);
42278b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
42378b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
42478b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4251eb62cbbSBarry Smith 
4261eb62cbbSBarry Smith   /* wait on sends */
4271eb62cbbSBarry Smith   if (nsends) {
4280452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
42978b31e54SBarry Smith     CHKPTRQ(send_status);
4301eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4310452661fSBarry Smith     PetscFree(send_status);
4321eb62cbbSBarry Smith   }
4330452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4341eb62cbbSBarry Smith 
4351eb62cbbSBarry Smith   return 0;
4361eb62cbbSBarry Smith }
4371eb62cbbSBarry Smith 
438416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4391eb62cbbSBarry Smith {
440416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
4411eb62cbbSBarry Smith   int        ierr;
442416022c9SBarry Smith 
44364119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
44438597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
44564119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
44638597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4471eb62cbbSBarry Smith   return 0;
4481eb62cbbSBarry Smith }
4491eb62cbbSBarry Smith 
450416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
451da3a660dSBarry Smith {
452416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
453da3a660dSBarry Smith   int        ierr;
45464119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
45538597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
45664119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
45738597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
458da3a660dSBarry Smith   return 0;
459da3a660dSBarry Smith }
460da3a660dSBarry Smith 
461416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
462da3a660dSBarry Smith {
463416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
464da3a660dSBarry Smith   int        ierr;
465da3a660dSBarry Smith 
466da3a660dSBarry Smith   /* do nondiagonal part */
46738597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
468da3a660dSBarry Smith   /* send it on its way */
469416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
47064119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
471da3a660dSBarry Smith   /* do local part */
47238597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
473da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
474da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
475da3a660dSBarry Smith   /* but is not perhaps always true. */
476416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
47764119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
478da3a660dSBarry Smith   return 0;
479da3a660dSBarry Smith }
480da3a660dSBarry Smith 
481416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
482da3a660dSBarry Smith {
483416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
484da3a660dSBarry Smith   int        ierr;
485da3a660dSBarry Smith 
486da3a660dSBarry Smith   /* do nondiagonal part */
48738597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
488da3a660dSBarry Smith   /* send it on its way */
489416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
49064119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
491da3a660dSBarry Smith   /* do local part */
49238597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
493da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
494da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
495da3a660dSBarry Smith   /* but is not perhaps always true. */
496416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
49764119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
498da3a660dSBarry Smith   return 0;
499da3a660dSBarry Smith }
500da3a660dSBarry Smith 
5011eb62cbbSBarry Smith /*
5021eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5031eb62cbbSBarry Smith    diagonal block
5041eb62cbbSBarry Smith */
505416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5061eb62cbbSBarry Smith {
507416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
50844cd7ae7SLois Curfman McInnes   if (a->M != a->N)
50944cd7ae7SLois Curfman McInnes     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
510416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5111eb62cbbSBarry Smith }
5121eb62cbbSBarry Smith 
513052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
514052efed2SBarry Smith {
515052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
516052efed2SBarry Smith   int        ierr;
517052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
518052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
519052efed2SBarry Smith   return 0;
520052efed2SBarry Smith }
521052efed2SBarry Smith 
52244a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5231eb62cbbSBarry Smith {
5241eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
52544a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5261eb62cbbSBarry Smith   int        ierr;
527a5a9c739SBarry Smith #if defined(PETSC_LOG)
5286e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
529a5a9c739SBarry Smith #endif
5300452661fSBarry Smith   PetscFree(aij->rowners);
53178b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
53278b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5330452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5340452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5351eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
536a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5377a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
5380452661fSBarry Smith   PetscFree(aij);
539a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5400452661fSBarry Smith   PetscHeaderDestroy(mat);
5411eb62cbbSBarry Smith   return 0;
5421eb62cbbSBarry Smith }
5437857610eSBarry Smith #include "draw.h"
544b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
545ee50ffe9SBarry Smith 
546416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5471eb62cbbSBarry Smith {
548416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
549416022c9SBarry Smith   int         ierr;
550416022c9SBarry Smith 
55117699dbbSLois Curfman McInnes   if (aij->size == 1) {
552416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
553416022c9SBarry Smith   }
554a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
555416022c9SBarry Smith   return 0;
556416022c9SBarry Smith }
557416022c9SBarry Smith 
558d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
559416022c9SBarry Smith {
56044a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
561dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
562a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
563d636dbe3SBarry Smith   FILE        *fd;
56419bcc07fSBarry Smith   ViewerType  vtype;
565416022c9SBarry Smith 
56619bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
56719bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
56890ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
56990ace30eSBarry Smith     if (format == ASCII_FORMAT_INFO_DETAILED) {
57095e01e2fSLois Curfman McInnes       int nz, nzalloc, mem, flg;
571a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
57290ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
573a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
57495e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
57577c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
57695e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
57795e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
57895e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
57995e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
58008480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
58195e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz);
58208480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
58395e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz);
584a56f8943SBarry Smith       fflush(fd);
58577c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
586a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
587416022c9SBarry Smith       return 0;
588416022c9SBarry Smith     }
58990ace30eSBarry Smith     else if (format == ASCII_FORMAT_INFO) {
59008480c60SBarry Smith       return 0;
59108480c60SBarry Smith     }
592416022c9SBarry Smith   }
593416022c9SBarry Smith 
59419bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
59519bcc07fSBarry Smith     Draw       draw;
59619bcc07fSBarry Smith     PetscTruth isnull;
59719bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
59819bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
59919bcc07fSBarry Smith   }
60019bcc07fSBarry Smith 
60119bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
60290ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
60377c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
604d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
60517699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
6061eb62cbbSBarry Smith            aij->cend);
60778b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
60878b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
609d13ab20cSBarry Smith     fflush(fd);
61077c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
611d13ab20cSBarry Smith   }
612416022c9SBarry Smith   else {
613a56f8943SBarry Smith     int size = aij->size;
614a56f8943SBarry Smith     rank = aij->rank;
61517699dbbSLois Curfman McInnes     if (size == 1) {
61678b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
61795373324SBarry Smith     }
61895373324SBarry Smith     else {
61995373324SBarry Smith       /* assemble the entire matrix onto first processor. */
62095373324SBarry Smith       Mat         A;
621ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6222eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
62395373324SBarry Smith       Scalar      *a;
6242ee70a88SLois Curfman McInnes 
62517699dbbSLois Curfman McInnes       if (!rank) {
626b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
627c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
62895373324SBarry Smith       }
62995373324SBarry Smith       else {
630b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
631c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
63295373324SBarry Smith       }
633464493b3SBarry Smith       PLogObjectParent(mat,A);
634416022c9SBarry Smith 
63595373324SBarry Smith       /* copy over the A part */
636ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6372ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
63895373324SBarry Smith       row = aij->rstart;
639dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
64095373324SBarry Smith       for ( i=0; i<m; i++ ) {
641416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
64295373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
64395373324SBarry Smith       }
6442ee70a88SLois Curfman McInnes       aj = Aloc->j;
645dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
64695373324SBarry Smith 
64795373324SBarry Smith       /* copy over the B part */
648ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6492ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
65095373324SBarry Smith       row = aij->rstart;
6510452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
652dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
65395373324SBarry Smith       for ( i=0; i<m; i++ ) {
654416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
65595373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
65695373324SBarry Smith       }
6570452661fSBarry Smith       PetscFree(ct);
65878b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
65978b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
66017699dbbSLois Curfman McInnes       if (!rank) {
66178b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
66295373324SBarry Smith       }
66378b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
66495373324SBarry Smith     }
66595373324SBarry Smith   }
6661eb62cbbSBarry Smith   return 0;
6671eb62cbbSBarry Smith }
6681eb62cbbSBarry Smith 
669416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
670416022c9SBarry Smith {
671416022c9SBarry Smith   Mat         mat = (Mat) obj;
672416022c9SBarry Smith   int         ierr;
67319bcc07fSBarry Smith   ViewerType  vtype;
674416022c9SBarry Smith 
675416022c9SBarry Smith   if (!viewer) {
67619bcc07fSBarry Smith     viewer = STDOUT_VIEWER_SELF;
677416022c9SBarry Smith   }
67819bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
67919bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
68019bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
681d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
682416022c9SBarry Smith   }
68319bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
684416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
685416022c9SBarry Smith   }
686416022c9SBarry Smith   return 0;
687416022c9SBarry Smith }
688416022c9SBarry Smith 
6891eb62cbbSBarry Smith /*
6901eb62cbbSBarry Smith     This has to provide several versions.
6911eb62cbbSBarry Smith 
6921eb62cbbSBarry Smith      1) per sequential
6931eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6941eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
695d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
6961eb62cbbSBarry Smith */
697fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
698dbb450caSBarry Smith                            double fshift,int its,Vec xx)
6998a729477SBarry Smith {
70044a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
701d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
702ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
7036abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
7046abc6512SBarry Smith   int        ierr,*idx, *diag;
705416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
706da3a660dSBarry Smith   Vec        tt;
7078a729477SBarry Smith 
708d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
709dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
710dbb450caSBarry Smith   ls = ls + shift;
711ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
712d6dfbf8fSBarry Smith   diag = A->diag;
713acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
71448d91487SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
715acb40c82SBarry Smith   }
716da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
717da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
718da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
719da3a660dSBarry Smith 
720da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
721da3a660dSBarry Smith 
722da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
723da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
724da3a660dSBarry Smith     is the relaxation factor.
725da3a660dSBarry Smith     */
72678b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
727464493b3SBarry Smith     PLogObjectParent(matin,tt);
728da3a660dSBarry Smith     VecGetArray(tt,&t);
729da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
730da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
731da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
73264119d58SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
73378b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
734da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
735da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
736dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
737dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
738da3a660dSBarry Smith       sum  = b[i];
739da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
740dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
741da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
742dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
743dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
744da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
745da3a660dSBarry Smith       x[i] = omega*(sum/d);
746da3a660dSBarry Smith     }
74764119d58SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
74878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
749da3a660dSBarry Smith 
750da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
751da3a660dSBarry Smith     v = A->a;
752dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
753da3a660dSBarry Smith 
754da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
755dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
756da3a660dSBarry Smith     diag = A->diag;
757da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
75864119d58SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
75978b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
760da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
761da3a660dSBarry Smith       n    = diag[i] - A->i[i];
762dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
763dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
764da3a660dSBarry Smith       sum  = t[i];
765da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
766dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
767da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
768dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
769dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
770da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
771da3a660dSBarry Smith       t[i] = omega*(sum/d);
772da3a660dSBarry Smith     }
77364119d58SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
77478b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
775da3a660dSBarry Smith     /*  x = x + t */
776da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
777da3a660dSBarry Smith     VecDestroy(tt);
778da3a660dSBarry Smith     return 0;
779da3a660dSBarry Smith   }
780da3a660dSBarry Smith 
7811eb62cbbSBarry Smith 
782d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
783da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
784da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
785da3a660dSBarry Smith     }
786da3a660dSBarry Smith     else {
78764119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
78878b31e54SBarry Smith       CHKERRQ(ierr);
78964119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
79078b31e54SBarry Smith       CHKERRQ(ierr);
791da3a660dSBarry Smith     }
792d6dfbf8fSBarry Smith     while (its--) {
793d6dfbf8fSBarry Smith       /* go down through the rows */
79464119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
79578b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
796d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
797d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
798dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
799dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
800d6dfbf8fSBarry Smith         sum  = b[i];
801d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
802dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
803d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
804dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
805dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
806d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
807d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
808d6dfbf8fSBarry Smith       }
80964119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
81078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
811d6dfbf8fSBarry Smith       /* come up through the rows */
81264119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
81378b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
814d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
815d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
816dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
817dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
818d6dfbf8fSBarry Smith         sum  = b[i];
819d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
820dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
821d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
822dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
823dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
824d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
825d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
826d6dfbf8fSBarry Smith       }
82764119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
82878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
829d6dfbf8fSBarry Smith     }
830d6dfbf8fSBarry Smith   }
831d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
832da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
833da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
83464119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
83578b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
836da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
837da3a660dSBarry Smith         n    = diag[i] - A->i[i];
838dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
839dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
840da3a660dSBarry Smith         sum  = b[i];
841da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
842dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
843da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
844dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
845dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
846da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
847da3a660dSBarry Smith         x[i] = omega*(sum/d);
848da3a660dSBarry Smith       }
84964119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
85078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
851da3a660dSBarry Smith       its--;
852da3a660dSBarry Smith     }
853d6dfbf8fSBarry Smith     while (its--) {
85464119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
85578b31e54SBarry Smith       CHKERRQ(ierr);
85664119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
85778b31e54SBarry Smith       CHKERRQ(ierr);
85864119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
85978b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
860d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
861d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
862dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
863dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
864d6dfbf8fSBarry Smith         sum  = b[i];
865d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
866dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
867d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
868dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
869dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
870d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
871d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
872d6dfbf8fSBarry Smith       }
87364119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
87478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
875d6dfbf8fSBarry Smith     }
876d6dfbf8fSBarry Smith   }
877d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
878da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
879da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
88064119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
88178b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
882da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
883da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
884dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
885dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
886da3a660dSBarry Smith         sum  = b[i];
887da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
888dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
889da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
890dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
891dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
892da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
893da3a660dSBarry Smith         x[i] = omega*(sum/d);
894da3a660dSBarry Smith       }
89564119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
89678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
897da3a660dSBarry Smith       its--;
898da3a660dSBarry Smith     }
899d6dfbf8fSBarry Smith     while (its--) {
90064119d58SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
90178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
90264119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
90378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
90464119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
90578b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
906d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
907d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
908dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
909dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
910d6dfbf8fSBarry Smith         sum  = b[i];
911d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
912dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
913d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
914dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
915dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
916d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
917d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
918d6dfbf8fSBarry Smith       }
91964119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
92078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
921d6dfbf8fSBarry Smith     }
922d6dfbf8fSBarry Smith   }
923d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
924da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
92538597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
926da3a660dSBarry Smith     }
92764119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
92878b31e54SBarry Smith     CHKERRQ(ierr);
92964119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
93078b31e54SBarry Smith     CHKERRQ(ierr);
931d6dfbf8fSBarry Smith     while (its--) {
932d6dfbf8fSBarry Smith       /* go down through the rows */
933d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
934d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
935dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
936dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
937d6dfbf8fSBarry Smith         sum  = b[i];
938d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
939dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
940d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
941dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
942dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
943d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
944d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
945d6dfbf8fSBarry Smith       }
946d6dfbf8fSBarry Smith       /* come up through the rows */
947d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
948d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
949dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
950dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
951d6dfbf8fSBarry Smith         sum  = b[i];
952d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
953dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
954d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
955dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
956dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
957d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
958d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
959d6dfbf8fSBarry Smith       }
960d6dfbf8fSBarry Smith     }
961d6dfbf8fSBarry Smith   }
962d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
963da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
96438597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
965da3a660dSBarry Smith     }
96664119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
96778b31e54SBarry Smith     CHKERRQ(ierr);
96864119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
96978b31e54SBarry Smith     CHKERRQ(ierr);
970d6dfbf8fSBarry Smith     while (its--) {
971d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
972d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
973dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
974dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
975d6dfbf8fSBarry Smith         sum  = b[i];
976d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
977dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
978d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
979dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
980dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
981d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
982d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
983d6dfbf8fSBarry Smith       }
984d6dfbf8fSBarry Smith     }
985d6dfbf8fSBarry Smith   }
986d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
987da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
98838597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
989da3a660dSBarry Smith     }
99064119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
99178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
99264119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
99378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
994d6dfbf8fSBarry Smith     while (its--) {
995d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
996d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
997dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
998dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
999d6dfbf8fSBarry Smith         sum  = b[i];
1000d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1001dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
1002d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
1003dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
1004dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
1005d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1006d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1007d6dfbf8fSBarry Smith       }
1008d6dfbf8fSBarry Smith     }
1009d6dfbf8fSBarry Smith   }
10108a729477SBarry Smith   return 0;
10118a729477SBarry Smith }
1012a66be287SLois Curfman McInnes 
1013fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1014fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
1015a66be287SLois Curfman McInnes {
1016a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1017a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
1018a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1019a66be287SLois Curfman McInnes 
102078b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
102178b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1022a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1023a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
1024bcd2baecSBarry Smith     if (nz)       *nz      = isend[0];
1025bcd2baecSBarry Smith     if (nzalloc)  *nzalloc = isend[1];
1026bcd2baecSBarry Smith     if (mem)      *mem     = isend[2];
1027a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1028d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1029bcd2baecSBarry Smith     if (nz)      *nz      = irecv[0];
1030bcd2baecSBarry Smith     if (nzalloc) *nzalloc = irecv[1];
1031bcd2baecSBarry Smith     if (mem)     *mem     = irecv[2];
1032a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1033d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1034bcd2baecSBarry Smith     if (nz)      *nz      = irecv[0];
1035bcd2baecSBarry Smith     if (nzalloc) *nzalloc = irecv[1];
1036bcd2baecSBarry Smith     if (mem)     *mem     = irecv[2];
1037a66be287SLois Curfman McInnes   }
1038a66be287SLois Curfman McInnes   return 0;
1039a66be287SLois Curfman McInnes }
1040a66be287SLois Curfman McInnes 
1041299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1042299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1043299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1044299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1045299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1046299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1047299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1048299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1049299609e3SLois Curfman McInnes 
1050416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1051c74985f6SBarry Smith {
1052c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1053c74985f6SBarry Smith 
1054c0bbcb79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
1055c0bbcb79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
1056c0bbcb79SLois Curfman McInnes       op == COLUMNS_SORTED ||
1057c0bbcb79SLois Curfman McInnes       op == ROW_ORIENTED) {
1058c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
1059c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
1060c74985f6SBarry Smith   }
1061c0bbcb79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
1062c0bbcb79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
1063c0bbcb79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1064c0bbcb79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
106594a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10664b0e389bSBarry Smith   else if (op == COLUMN_ORIENTED) {
10674b0e389bSBarry Smith     a->roworiented = 0;
10684b0e389bSBarry Smith     MatSetOption(a->A,op);
10694b0e389bSBarry Smith     MatSetOption(a->B,op);
10704b0e389bSBarry Smith   }
1071c0bbcb79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
10724d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1073c0bbcb79SLois Curfman McInnes   else
10744d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1075c74985f6SBarry Smith   return 0;
1076c74985f6SBarry Smith }
1077c74985f6SBarry Smith 
1078d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1079c74985f6SBarry Smith {
108044a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1081c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1082c74985f6SBarry Smith   return 0;
1083c74985f6SBarry Smith }
1084c74985f6SBarry Smith 
1085d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1086c74985f6SBarry Smith {
108744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1088b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1089c74985f6SBarry Smith   return 0;
1090c74985f6SBarry Smith }
1091c74985f6SBarry Smith 
1092d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1093c74985f6SBarry Smith {
109444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1095c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1096c74985f6SBarry Smith   return 0;
1097c74985f6SBarry Smith }
1098c74985f6SBarry Smith 
10996d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11006d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11016d84be18SBarry Smith 
11026d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
110339e00950SLois Curfman McInnes {
1104154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
110570f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1106154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1107154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
110870f0671dSBarry Smith   int        *cmap, *idx_p;
110939e00950SLois Curfman McInnes 
11107a0afa10SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
11117a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
11127a0afa10SBarry Smith 
111370f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
11147a0afa10SBarry Smith     /*
11157a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
11167a0afa10SBarry Smith     */
11177a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
11187a0afa10SBarry Smith     int     max = 1,n = mat->n,tmp;
11197a0afa10SBarry Smith     for ( i=0; i<n; i++ ) {
11207a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
11217a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
11227a0afa10SBarry Smith     }
11237a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
11247a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
11257a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
11267a0afa10SBarry Smith   }
11277a0afa10SBarry Smith 
11287a0afa10SBarry Smith 
1129b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1130abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
113139e00950SLois Curfman McInnes 
1132154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1133154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1134154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
113538597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
113638597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1137154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1138154123eaSLois Curfman McInnes 
113970f0671dSBarry Smith   cmap  = mat->garray;
1140154123eaSLois Curfman McInnes   if (v  || idx) {
1141154123eaSLois Curfman McInnes     if (nztot) {
1142154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
114370f0671dSBarry Smith       int imark = -1;
1144154123eaSLois Curfman McInnes       if (v) {
114570f0671dSBarry Smith         *v = v_p = mat->rowvalues;
114639e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
114770f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1148154123eaSLois Curfman McInnes           else break;
1149154123eaSLois Curfman McInnes         }
1150154123eaSLois Curfman McInnes         imark = i;
115170f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
115270f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1153154123eaSLois Curfman McInnes       }
1154154123eaSLois Curfman McInnes       if (idx) {
115570f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
115670f0671dSBarry Smith         if (imark > -1) {
115770f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
115870f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
115970f0671dSBarry Smith           }
116070f0671dSBarry Smith         } else {
1161154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
116270f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1163154123eaSLois Curfman McInnes             else break;
1164154123eaSLois Curfman McInnes           }
1165154123eaSLois Curfman McInnes           imark = i;
116670f0671dSBarry Smith         }
116770f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
116870f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
116939e00950SLois Curfman McInnes       }
117039e00950SLois Curfman McInnes     }
117139e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1172154123eaSLois Curfman McInnes   }
117339e00950SLois Curfman McInnes   *nz = nztot;
117438597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
117538597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
117639e00950SLois Curfman McInnes   return 0;
117739e00950SLois Curfman McInnes }
117839e00950SLois Curfman McInnes 
11796d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
118039e00950SLois Curfman McInnes {
11817a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
11827a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
11837a0afa10SBarry Smith     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
11847a0afa10SBarry Smith   }
11857a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
118639e00950SLois Curfman McInnes   return 0;
118739e00950SLois Curfman McInnes }
118839e00950SLois Curfman McInnes 
1189cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1190855ac2c5SLois Curfman McInnes {
1191855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1192ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1193416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1194416022c9SBarry Smith   double     sum = 0.0;
119504ca555eSLois Curfman McInnes   Scalar     *v;
119604ca555eSLois Curfman McInnes 
119717699dbbSLois Curfman McInnes   if (aij->size == 1) {
119814183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
119937fa93a5SLois Curfman McInnes   } else {
120004ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
120104ca555eSLois Curfman McInnes       v = amat->a;
120204ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
120304ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
120404ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
120504ca555eSLois Curfman McInnes #else
120604ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
120704ca555eSLois Curfman McInnes #endif
120804ca555eSLois Curfman McInnes       }
120904ca555eSLois Curfman McInnes       v = bmat->a;
121004ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
121104ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
121204ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
121304ca555eSLois Curfman McInnes #else
121404ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
121504ca555eSLois Curfman McInnes #endif
121604ca555eSLois Curfman McInnes       }
12176d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
121804ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
121904ca555eSLois Curfman McInnes     }
122004ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
122104ca555eSLois Curfman McInnes       double *tmp, *tmp2;
122204ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
12230452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
12240452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1225cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
122604ca555eSLois Curfman McInnes       *norm = 0.0;
122704ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
122804ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1229579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
123004ca555eSLois Curfman McInnes       }
123104ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
123204ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1233579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
123404ca555eSLois Curfman McInnes       }
12356d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
123604ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
123704ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
123804ca555eSLois Curfman McInnes       }
12390452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
124004ca555eSLois Curfman McInnes     }
124104ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1242515d9167SLois Curfman McInnes       double ntemp = 0.0;
124304ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1244dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
124504ca555eSLois Curfman McInnes         sum = 0.0;
124604ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1247cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
124804ca555eSLois Curfman McInnes         }
1249dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
125004ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1251cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
125204ca555eSLois Curfman McInnes         }
1253515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
125404ca555eSLois Curfman McInnes       }
12556d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
125604ca555eSLois Curfman McInnes     }
125704ca555eSLois Curfman McInnes     else {
1258515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
125904ca555eSLois Curfman McInnes     }
126037fa93a5SLois Curfman McInnes   }
1261855ac2c5SLois Curfman McInnes   return 0;
1262855ac2c5SLois Curfman McInnes }
1263855ac2c5SLois Curfman McInnes 
12640de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1265b7c46309SBarry Smith {
1266b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1267dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1268416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1269416022c9SBarry Smith   Mat        B;
1270b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1271b7c46309SBarry Smith   Scalar     *array;
1272b7c46309SBarry Smith 
12733638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
12743638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1275b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1276b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1277b7c46309SBarry Smith 
1278b7c46309SBarry Smith   /* copy over the A part */
1279ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1280b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1281b7c46309SBarry Smith   row = a->rstart;
1282dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1283b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1284416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1285b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1286b7c46309SBarry Smith   }
1287b7c46309SBarry Smith   aj = Aloc->j;
12884af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1289b7c46309SBarry Smith 
1290b7c46309SBarry Smith   /* copy over the B part */
1291ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1292b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1293b7c46309SBarry Smith   row = a->rstart;
12940452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1295dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1296b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1297416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1298b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1299b7c46309SBarry Smith   }
13000452661fSBarry Smith   PetscFree(ct);
1301b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1302b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
13033638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
13040de55854SLois Curfman McInnes     *matout = B;
13050de55854SLois Curfman McInnes   } else {
13060de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
13070452661fSBarry Smith     PetscFree(a->rowners);
13080de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
13090de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
13100452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
13110452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
13120de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1313a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
13140452661fSBarry Smith     PetscFree(a);
1315416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
13160452661fSBarry Smith     PetscHeaderDestroy(B);
13170de55854SLois Curfman McInnes   }
1318b7c46309SBarry Smith   return 0;
1319b7c46309SBarry Smith }
1320b7c46309SBarry Smith 
1321682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1322682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1323682d7d0cSBarry Smith {
1324682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1325682d7d0cSBarry Smith 
1326682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1327682d7d0cSBarry Smith   else return 0;
1328682d7d0cSBarry Smith }
1329682d7d0cSBarry Smith 
1330fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
13313d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
13322f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1333598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
13348a729477SBarry Smith /* -------------------------------------------------------------------*/
13352ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
133639e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
133744a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
133844a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1339299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1340299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1341299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
134244a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1343b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1344a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1345855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1346ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13471eb62cbbSBarry Smith        0,
1348299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1349299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1350299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1351d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1352299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
13533d1612f7SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1354b49de8d1SLois Curfman McInnes        0,0,0,
1355598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1356052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
1357052efed2SBarry Smith        MatScale_MPIAIJ};
13588a729477SBarry Smith 
13591987afe7SBarry Smith /*@C
1360ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
13613a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
13623a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
13633a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
13643a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
13658a729477SBarry Smith 
13668a729477SBarry Smith    Input Parameters:
13671eb62cbbSBarry Smith .  comm - MPI communicator
13687d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
13697d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
13707d3e4905SLois Curfman McInnes            if N is given)
13717d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
13727d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
13737d3e4905SLois Curfman McInnes            if n is given)
1374ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1375ff756334SLois Curfman McInnes            (same for all local rows)
13762bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
13772bd5e0b2SLois Curfman McInnes            diagonal portion of local submatrix (possibly different for each row)
13782bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
13792bd5e0b2SLois Curfman McInnes            it is zero.
13802bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1381ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
13822bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
13832bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
13842bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
13858a729477SBarry Smith 
1386ff756334SLois Curfman McInnes    Output Parameter:
138744cd7ae7SLois Curfman McInnes .  A - the matrix
13888a729477SBarry Smith 
1389ff756334SLois Curfman McInnes    Notes:
1390ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1391ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
13920002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
13930002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1394ff756334SLois Curfman McInnes 
1395ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1396ff756334SLois Curfman McInnes    (possibly both).
1397ff756334SLois Curfman McInnes 
13985511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
13995511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
14005511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
14015511cfe3SLois Curfman McInnes 
14025511cfe3SLois Curfman McInnes    Options Database Keys:
14035511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
14046e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
14056e62573dSLois Curfman McInnes $        (max limit=5)
14066e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
14076e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
14086e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
14095511cfe3SLois Curfman McInnes 
1410e0245417SLois Curfman McInnes    Storage Information:
1411e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1412e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1413e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1414e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1415e0245417SLois Curfman McInnes 
1416e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
14175ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
14185ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
14195ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
14205ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1421ff756334SLois Curfman McInnes 
14225511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
14235511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
14242191d07cSBarry Smith 
1425b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1426b810aeb4SBarry Smith $         -------------------
14275511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
14285511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
14295511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
14305511cfe3SLois Curfman McInnes $         -------------------
1431b810aeb4SBarry Smith $
1432b810aeb4SBarry Smith 
14335511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
14345511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
14355511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
14365511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
14375511cfe3SLois Curfman McInnes 
14385511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
14395511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
14405511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
14413d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
14423d323bbdSBarry Smith    or you will get TERRIBLE performance, see the users' manual chapter on
14433d323bbdSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
14443a511b96SLois Curfman McInnes 
1445dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1446ff756334SLois Curfman McInnes 
1447fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
14488a729477SBarry Smith @*/
14491eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
145044cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
14518a729477SBarry Smith {
145244cd7ae7SLois Curfman McInnes   Mat          B;
145344cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
14546abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1455416022c9SBarry Smith 
145644cd7ae7SLois Curfman McInnes   *A = 0;
145744cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
145844cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
145944cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
146044cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
146144cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
146244cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
146344cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
146444cd7ae7SLois Curfman McInnes   B->factor     = 0;
146544cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
1466d6dfbf8fSBarry Smith 
146744cd7ae7SLois Curfman McInnes   b->insertmode = NOT_SET_VALUES;
146844cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
146944cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
14701eb62cbbSBarry Smith 
1471b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
14722e0155e0SLois Curfman McInnes     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
14731987afe7SBarry Smith 
1474dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
14751eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1476d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1477dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1478dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
14791eb62cbbSBarry Smith   }
148044cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
148144cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
148244cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
148344cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
148444cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
148544cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
14861eb62cbbSBarry Smith 
14871eb62cbbSBarry Smith   /* build local table of row and column ownerships */
148844cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
148944cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
149044cd7ae7SLois Curfman McInnes   b->cowners = b->rowners + b->size + 1;
149144cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
149244cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
149344cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
149444cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
14958a729477SBarry Smith   }
149644cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
149744cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
149844cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
149944cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
150044cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
150144cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
15028a729477SBarry Smith   }
150344cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
150444cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
15058a729477SBarry Smith 
15065ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
150744cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
150844cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
15097b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
151044cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
151144cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
15128a729477SBarry Smith 
15131eb62cbbSBarry Smith   /* build cache for off array entries formed */
151444cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
151544cd7ae7SLois Curfman McInnes   b->colmap      = 0;
151644cd7ae7SLois Curfman McInnes   b->garray      = 0;
151744cd7ae7SLois Curfman McInnes   b->roworiented = 1;
15188a729477SBarry Smith 
15191eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
152044cd7ae7SLois Curfman McInnes   b->lvec      = 0;
152144cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
15228a729477SBarry Smith 
15237a0afa10SBarry Smith   /* stuff for MatGetRow() */
152444cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
152544cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
152644cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
15277a0afa10SBarry Smith 
152844cd7ae7SLois Curfman McInnes   *A = B;
1529d6dfbf8fSBarry Smith   return 0;
1530d6dfbf8fSBarry Smith }
1531c74985f6SBarry Smith 
15323d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1533d6dfbf8fSBarry Smith {
1534d6dfbf8fSBarry Smith   Mat        mat;
1535416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1536a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1537d6dfbf8fSBarry Smith 
1538416022c9SBarry Smith   *newmat       = 0;
15390452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1540a5a9c739SBarry Smith   PLogObjectCreate(mat);
15410452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1542416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
154344a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
154444a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1545d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1546c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1547d6dfbf8fSBarry Smith 
154844cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
154944cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
155044cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
155144cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1552d6dfbf8fSBarry Smith 
1553416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1554416022c9SBarry Smith   a->rend         = oldmat->rend;
1555416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1556416022c9SBarry Smith   a->cend         = oldmat->cend;
155717699dbbSLois Curfman McInnes   a->size         = oldmat->size;
155817699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
155964119d58SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
1560bcd2baecSBarry Smith   a->rowvalues    = 0;
1561bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1562d6dfbf8fSBarry Smith 
15630452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
156417699dbbSLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
156517699dbbSLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1566416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
15672ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
15680452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1569416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1570416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1571416022c9SBarry Smith   } else a->colmap = 0;
1572ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
15730452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1574464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1575416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1576416022c9SBarry Smith   } else a->garray = 0;
1577d6dfbf8fSBarry Smith 
1578416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1579416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1580a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1581416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1582416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1583416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1584416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1585416022c9SBarry Smith   PLogObjectParent(mat,a->B);
15865dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
158725cdf11fSBarry Smith   if (flg) {
1588682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1589682d7d0cSBarry Smith   }
15908a729477SBarry Smith   *newmat = mat;
15918a729477SBarry Smith   return 0;
15928a729477SBarry Smith }
1593416022c9SBarry Smith 
159477c4ece6SBarry Smith #include "sys.h"
1595416022c9SBarry Smith 
159619bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1597416022c9SBarry Smith {
1598d65a2f8fSBarry Smith   Mat          A;
1599416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1600d65a2f8fSBarry Smith   Scalar       *vals,*svals;
160119bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1602416022c9SBarry Smith   MPI_Status   status;
160317699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1604d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
160519bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1606416022c9SBarry Smith 
160717699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
160817699dbbSLois Curfman McInnes   if (!rank) {
160990ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
161077c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1611c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1612416022c9SBarry Smith   }
1613416022c9SBarry Smith 
1614416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1615416022c9SBarry Smith   M = header[1]; N = header[2];
1616416022c9SBarry Smith   /* determine ownership of all rows */
161717699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
16180452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1619416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1620416022c9SBarry Smith   rowners[0] = 0;
162117699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1622416022c9SBarry Smith     rowners[i] += rowners[i-1];
1623416022c9SBarry Smith   }
162417699dbbSLois Curfman McInnes   rstart = rowners[rank];
162517699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1626416022c9SBarry Smith 
1627416022c9SBarry Smith   /* distribute row lengths to all processors */
16280452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1629416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
163017699dbbSLois Curfman McInnes   if (!rank) {
16310452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
163277c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
16330452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
163417699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1635d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
16360452661fSBarry Smith     PetscFree(sndcounts);
1637416022c9SBarry Smith   }
1638416022c9SBarry Smith   else {
1639416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1640416022c9SBarry Smith   }
1641416022c9SBarry Smith 
164217699dbbSLois Curfman McInnes   if (!rank) {
1643416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
16440452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1645cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
164617699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1647416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1648416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1649416022c9SBarry Smith       }
1650416022c9SBarry Smith     }
16510452661fSBarry Smith     PetscFree(rowlengths);
1652416022c9SBarry Smith 
1653416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1654416022c9SBarry Smith     maxnz = 0;
165517699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
16560452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1657416022c9SBarry Smith     }
16580452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1659416022c9SBarry Smith 
1660416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1661416022c9SBarry Smith     nz = procsnz[0];
16620452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
166377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1664d65a2f8fSBarry Smith 
1665d65a2f8fSBarry Smith     /* read in every one elses and ship off */
166617699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1667d65a2f8fSBarry Smith       nz = procsnz[i];
166877c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1669d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1670d65a2f8fSBarry Smith     }
16710452661fSBarry Smith     PetscFree(cols);
1672416022c9SBarry Smith   }
1673416022c9SBarry Smith   else {
1674416022c9SBarry Smith     /* determine buffer space needed for message */
1675416022c9SBarry Smith     nz = 0;
1676416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1677416022c9SBarry Smith       nz += ourlens[i];
1678416022c9SBarry Smith     }
16790452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1680416022c9SBarry Smith 
1681416022c9SBarry Smith     /* receive message of column indices*/
1682d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1683416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1684c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1685416022c9SBarry Smith   }
1686416022c9SBarry Smith 
1687416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1688cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1689416022c9SBarry Smith   jj = 0;
1690416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1691416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1692d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1693416022c9SBarry Smith       jj++;
1694416022c9SBarry Smith     }
1695416022c9SBarry Smith   }
1696d65a2f8fSBarry Smith 
1697d65a2f8fSBarry Smith   /* create our matrix */
1698416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1699416022c9SBarry Smith     ourlens[i] -= offlens[i];
1700416022c9SBarry Smith   }
1701d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1702d65a2f8fSBarry Smith   A = *newmat;
1703d65a2f8fSBarry Smith   MatSetOption(A,COLUMNS_SORTED);
1704d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1705d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1706d65a2f8fSBarry Smith   }
1707416022c9SBarry Smith 
170817699dbbSLois Curfman McInnes   if (!rank) {
17090452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1710416022c9SBarry Smith 
1711416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1712416022c9SBarry Smith     nz = procsnz[0];
171377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1714d65a2f8fSBarry Smith 
1715d65a2f8fSBarry Smith     /* insert into matrix */
1716d65a2f8fSBarry Smith     jj      = rstart;
1717d65a2f8fSBarry Smith     smycols = mycols;
1718d65a2f8fSBarry Smith     svals   = vals;
1719d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1720d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1721d65a2f8fSBarry Smith       smycols += ourlens[i];
1722d65a2f8fSBarry Smith       svals   += ourlens[i];
1723d65a2f8fSBarry Smith       jj++;
1724416022c9SBarry Smith     }
1725416022c9SBarry Smith 
1726d65a2f8fSBarry Smith     /* read in other processors and ship out */
172717699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1728416022c9SBarry Smith       nz = procsnz[i];
172977c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1730d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1731416022c9SBarry Smith     }
17320452661fSBarry Smith     PetscFree(procsnz);
1733416022c9SBarry Smith   }
1734d65a2f8fSBarry Smith   else {
1735d65a2f8fSBarry Smith     /* receive numeric values */
17360452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1737416022c9SBarry Smith 
1738d65a2f8fSBarry Smith     /* receive message of values*/
1739d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1740d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1741c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1742d65a2f8fSBarry Smith 
1743d65a2f8fSBarry Smith     /* insert into matrix */
1744d65a2f8fSBarry Smith     jj      = rstart;
1745d65a2f8fSBarry Smith     smycols = mycols;
1746d65a2f8fSBarry Smith     svals   = vals;
1747d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1748d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1749d65a2f8fSBarry Smith       smycols += ourlens[i];
1750d65a2f8fSBarry Smith       svals   += ourlens[i];
1751d65a2f8fSBarry Smith       jj++;
1752d65a2f8fSBarry Smith     }
1753d65a2f8fSBarry Smith   }
17540452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1755d65a2f8fSBarry Smith 
1756d65a2f8fSBarry Smith   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1757d65a2f8fSBarry Smith   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1758416022c9SBarry Smith   return 0;
1759416022c9SBarry Smith }
1760