xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c16cb8f2de640b3af1673495cb6d811eff24116a)
1cb512458SBarry Smith #ifndef lint
2*c16cb8f2SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.155 1996/07/29 22:51:58 balay Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
7d9942c19SSatish Balay #include "src/inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
2970423df4SBarry Smith static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int        ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
408a729477SBarry Smith {
4144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
434b0e389bSBarry Smith   Scalar     value;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
464b0e389bSBarry Smith   int        shift = C->indexshift,roworiented = aij->roworiented;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
534b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
544b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
564b0e389bSBarry Smith       row = im[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
584b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
594b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
604b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
614b0e389bSBarry Smith           col = in[j] - cstart;
624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
634b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
641eb62cbbSBarry Smith         }
651eb62cbbSBarry Smith         else {
66227d817aSBarry Smith           if (mat->was_assembled) {
67b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
684b0e389bSBarry Smith             col = aij->colmap[in[j]] + shift;
69ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
702493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
714b0e389bSBarry Smith               col =  in[j];
72d6dfbf8fSBarry Smith             }
739e25ed09SBarry Smith           }
744b0e389bSBarry Smith           else col = in[j];
754b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
764b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
771eb62cbbSBarry Smith         }
781eb62cbbSBarry Smith       }
791eb62cbbSBarry Smith     }
801eb62cbbSBarry Smith     else {
814b0e389bSBarry Smith       if (roworiented) {
824b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
834b0e389bSBarry Smith       }
844b0e389bSBarry Smith       else {
854b0e389bSBarry Smith         row = im[i];
864b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
874b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
884b0e389bSBarry Smith         }
894b0e389bSBarry Smith       }
901eb62cbbSBarry Smith     }
918a729477SBarry Smith   }
928a729477SBarry Smith   return 0;
938a729477SBarry Smith }
948a729477SBarry Smith 
95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96b49de8d1SLois Curfman McInnes {
97b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
101b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
102b49de8d1SLois Curfman McInnes 
103b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
104b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
107b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
108b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
109b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
112b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
113b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114b49de8d1SLois Curfman McInnes         }
115b49de8d1SLois Curfman McInnes         else {
11670423df4SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
117b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
118d9d09a02SSatish Balay           if ( aij->garray[col] != idxn[j] ) *(v+i*n+j) = 0.0;
119d9d09a02SSatish Balay           else {
120b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
121b49de8d1SLois Curfman McInnes           }
122b49de8d1SLois Curfman McInnes         }
123b49de8d1SLois Curfman McInnes       }
124d9d09a02SSatish Balay     }
125b49de8d1SLois Curfman McInnes     else {
126b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
127b49de8d1SLois Curfman McInnes     }
128b49de8d1SLois Curfman McInnes   }
129b49de8d1SLois Curfman McInnes   return 0;
130b49de8d1SLois Curfman McInnes }
131b49de8d1SLois Curfman McInnes 
132fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1338a729477SBarry Smith {
13444a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
135d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
13617699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
13717699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1381eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1396abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1401eb62cbbSBarry Smith   InsertMode  addv;
1411eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1421eb62cbbSBarry Smith 
1431eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
144d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
145dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
146bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1471eb62cbbSBarry Smith   }
1481eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1491eb62cbbSBarry Smith 
1501eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1510452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
152cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1530452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1541eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1551eb62cbbSBarry Smith     idx = aij->stash.idx[i];
15617699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1571eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1581eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1598a729477SBarry Smith       }
1608a729477SBarry Smith     }
1618a729477SBarry Smith   }
16217699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1631eb62cbbSBarry Smith 
1641eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1650452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
16617699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
16717699dbbSLois Curfman McInnes   nreceives = work[rank];
16817699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
16917699dbbSLois Curfman McInnes   nmax = work[rank];
1700452661fSBarry Smith   PetscFree(work);
1711eb62cbbSBarry Smith 
1721eb62cbbSBarry Smith   /* post receives:
1731eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1741eb62cbbSBarry Smith      (global index,value) we store the global index as a double
175d6dfbf8fSBarry Smith      to simplify the message passing.
1761eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1771eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1781eb62cbbSBarry Smith      this is a lot of wasted space.
1791eb62cbbSBarry Smith 
1801eb62cbbSBarry Smith 
1811eb62cbbSBarry Smith        This could be done better.
1821eb62cbbSBarry Smith   */
1830452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
18478b31e54SBarry Smith   CHKPTRQ(rvalues);
1850452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
18678b31e54SBarry Smith   CHKPTRQ(recv_waits);
1871eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
188416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1891eb62cbbSBarry Smith               comm,recv_waits+i);
1901eb62cbbSBarry Smith   }
1911eb62cbbSBarry Smith 
1921eb62cbbSBarry Smith   /* do sends:
1931eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1941eb62cbbSBarry Smith          the ith processor
1951eb62cbbSBarry Smith   */
1960452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1970452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
19878b31e54SBarry Smith   CHKPTRQ(send_waits);
1990452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
2001eb62cbbSBarry Smith   starts[0] = 0;
20117699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2021eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2031eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2041eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2051eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2061eb62cbbSBarry Smith   }
2070452661fSBarry Smith   PetscFree(owner);
2081eb62cbbSBarry Smith   starts[0] = 0;
20917699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2101eb62cbbSBarry Smith   count = 0;
21117699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2121eb62cbbSBarry Smith     if (procs[i]) {
213416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2141eb62cbbSBarry Smith                 comm,send_waits+count++);
2151eb62cbbSBarry Smith     }
2161eb62cbbSBarry Smith   }
2170452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2181eb62cbbSBarry Smith 
2191eb62cbbSBarry Smith   /* Free cache space */
2202191d07cSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
22178b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2221eb62cbbSBarry Smith 
2231eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2241eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2251eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2261eb62cbbSBarry Smith   aij->rmax       = nmax;
2271eb62cbbSBarry Smith 
2281eb62cbbSBarry Smith   return 0;
2291eb62cbbSBarry Smith }
23044a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2311eb62cbbSBarry Smith 
232fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2331eb62cbbSBarry Smith {
23444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
235dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2361eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
237416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
238416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2391eb62cbbSBarry Smith   Scalar      *values,val;
2401eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2411eb62cbbSBarry Smith 
2421eb62cbbSBarry Smith   /*  wait on receives */
2431eb62cbbSBarry Smith   while (count) {
244d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2451eb62cbbSBarry Smith     /* unpack receives into our local space */
246d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
247c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2481eb62cbbSBarry Smith     n = n/3;
2491eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
250227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
251227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2521eb62cbbSBarry Smith       val = values[3*i+2];
2531eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2541eb62cbbSBarry Smith         col -= aij->cstart;
2551eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2561eb62cbbSBarry Smith       }
2571eb62cbbSBarry Smith       else {
258227d817aSBarry Smith         if (mat->was_assembled) {
259b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
260dbb450caSBarry Smith           col = aij->colmap[col] + shift;
261ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2622493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
263227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
264d6dfbf8fSBarry Smith           }
2659e25ed09SBarry Smith         }
2661eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2671eb62cbbSBarry Smith       }
2681eb62cbbSBarry Smith     }
2691eb62cbbSBarry Smith     count--;
2701eb62cbbSBarry Smith   }
2710452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2721eb62cbbSBarry Smith 
2731eb62cbbSBarry Smith   /* wait on sends */
2741eb62cbbSBarry Smith   if (aij->nsends) {
2750452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
27678b31e54SBarry Smith     CHKPTRQ(send_status);
2771eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2780452661fSBarry Smith     PetscFree(send_status);
2791eb62cbbSBarry Smith   }
2800452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2811eb62cbbSBarry Smith 
28264119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
28378b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
28478b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2851eb62cbbSBarry Smith 
2862493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2872493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
288227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
289227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
2902493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2912493cbb0SBarry Smith   }
2922493cbb0SBarry Smith 
2936d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
29478b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
2955e42470aSBarry Smith   }
29678b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
29778b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2985e42470aSBarry Smith 
2997a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
3008a729477SBarry Smith   return 0;
3018a729477SBarry Smith }
3028a729477SBarry Smith 
3032ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3041eb62cbbSBarry Smith {
30544a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
306dbd7a890SLois Curfman McInnes   int        ierr;
30778b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
30878b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3091eb62cbbSBarry Smith   return 0;
3101eb62cbbSBarry Smith }
3111eb62cbbSBarry Smith 
3121eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3131eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3141eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3151eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3161eb62cbbSBarry Smith    routine.
3171eb62cbbSBarry Smith */
31844a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3191eb62cbbSBarry Smith {
32044a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
32117699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3226abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
32317699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3245392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
325d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
326d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3271eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3281eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3291eb62cbbSBarry Smith   IS             istmp;
3301eb62cbbSBarry Smith 
33177c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
33278b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3331eb62cbbSBarry Smith 
3341eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3350452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
336cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3370452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3381eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3391eb62cbbSBarry Smith     idx = rows[i];
3401eb62cbbSBarry Smith     found = 0;
34117699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3421eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3431eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3441eb62cbbSBarry Smith       }
3451eb62cbbSBarry Smith     }
346bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3471eb62cbbSBarry Smith   }
34817699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3491eb62cbbSBarry Smith 
3501eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3510452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
35217699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
35317699dbbSLois Curfman McInnes   nrecvs = work[rank];
35417699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
35517699dbbSLois Curfman McInnes   nmax = work[rank];
3560452661fSBarry Smith   PetscFree(work);
3571eb62cbbSBarry Smith 
3581eb62cbbSBarry Smith   /* post receives:   */
3590452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
36078b31e54SBarry Smith   CHKPTRQ(rvalues);
3610452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
36278b31e54SBarry Smith   CHKPTRQ(recv_waits);
3631eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
364416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3651eb62cbbSBarry Smith   }
3661eb62cbbSBarry Smith 
3671eb62cbbSBarry Smith   /* do sends:
3681eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3691eb62cbbSBarry Smith          the ith processor
3701eb62cbbSBarry Smith   */
3710452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3720452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
37378b31e54SBarry Smith   CHKPTRQ(send_waits);
3740452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3751eb62cbbSBarry Smith   starts[0] = 0;
37617699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3771eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3781eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3791eb62cbbSBarry Smith   }
3801eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3811eb62cbbSBarry Smith 
3821eb62cbbSBarry Smith   starts[0] = 0;
38317699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3841eb62cbbSBarry Smith   count = 0;
38517699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3861eb62cbbSBarry Smith     if (procs[i]) {
387416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3881eb62cbbSBarry Smith     }
3891eb62cbbSBarry Smith   }
3900452661fSBarry Smith   PetscFree(starts);
3911eb62cbbSBarry Smith 
39217699dbbSLois Curfman McInnes   base = owners[rank];
3931eb62cbbSBarry Smith 
3941eb62cbbSBarry Smith   /*  wait on receives */
3950452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3961eb62cbbSBarry Smith   source = lens + nrecvs;
3971eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3981eb62cbbSBarry Smith   while (count) {
399d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
4001eb62cbbSBarry Smith     /* unpack receives into our local space */
4011eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
402d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
403d6dfbf8fSBarry Smith     lens[imdex]  = n;
4041eb62cbbSBarry Smith     slen += n;
4051eb62cbbSBarry Smith     count--;
4061eb62cbbSBarry Smith   }
4070452661fSBarry Smith   PetscFree(recv_waits);
4081eb62cbbSBarry Smith 
4091eb62cbbSBarry Smith   /* move the data into the send scatter */
4100452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4111eb62cbbSBarry Smith   count = 0;
4121eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4131eb62cbbSBarry Smith     values = rvalues + i*nmax;
4141eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4151eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4161eb62cbbSBarry Smith     }
4171eb62cbbSBarry Smith   }
4180452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4190452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4201eb62cbbSBarry Smith 
4211eb62cbbSBarry Smith   /* actually zap the local rows */
422416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
423464493b3SBarry Smith   PLogObjectParent(A,istmp);
4240452661fSBarry Smith   PetscFree(lrows);
42578b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
42678b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
42778b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4281eb62cbbSBarry Smith 
4291eb62cbbSBarry Smith   /* wait on sends */
4301eb62cbbSBarry Smith   if (nsends) {
4310452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
43278b31e54SBarry Smith     CHKPTRQ(send_status);
4331eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4340452661fSBarry Smith     PetscFree(send_status);
4351eb62cbbSBarry Smith   }
4360452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4371eb62cbbSBarry Smith 
4381eb62cbbSBarry Smith   return 0;
4391eb62cbbSBarry Smith }
4401eb62cbbSBarry Smith 
441416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4421eb62cbbSBarry Smith {
443416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
444fbd6ef76SBarry Smith   int        ierr,nt;
445416022c9SBarry Smith 
446a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
447fbd6ef76SBarry Smith   if (nt != a->n) {
44847b4a8eaSLois Curfman McInnes     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx");
449fbd6ef76SBarry Smith   }
450a2ce50c7SBarry Smith   ierr = VecGetLocalSize(yy,&nt);  CHKERRQ(ierr);
451fbd6ef76SBarry Smith   if (nt != a->m) {
45247b4a8eaSLois Curfman McInnes     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and yy");
453fbd6ef76SBarry Smith   }
45464119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
45538597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
45664119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
45738597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4581eb62cbbSBarry Smith   return 0;
4591eb62cbbSBarry Smith }
4601eb62cbbSBarry Smith 
461416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
462da3a660dSBarry Smith {
463416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
464da3a660dSBarry Smith   int        ierr;
46564119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
46638597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
46764119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
46838597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
469da3a660dSBarry Smith   return 0;
470da3a660dSBarry Smith }
471da3a660dSBarry Smith 
472416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
473da3a660dSBarry Smith {
474416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
475da3a660dSBarry Smith   int        ierr;
476da3a660dSBarry Smith 
477da3a660dSBarry Smith   /* do nondiagonal part */
47838597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
479da3a660dSBarry Smith   /* send it on its way */
480416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
48164119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
482da3a660dSBarry Smith   /* do local part */
48338597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
484da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
485da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
486da3a660dSBarry Smith   /* but is not perhaps always true. */
487416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
48864119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
489da3a660dSBarry Smith   return 0;
490da3a660dSBarry Smith }
491da3a660dSBarry Smith 
492416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
493da3a660dSBarry Smith {
494416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
495da3a660dSBarry Smith   int        ierr;
496da3a660dSBarry Smith 
497da3a660dSBarry Smith   /* do nondiagonal part */
49838597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
499da3a660dSBarry Smith   /* send it on its way */
500416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
50164119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
502da3a660dSBarry Smith   /* do local part */
50338597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
504da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
505da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
506da3a660dSBarry Smith   /* but is not perhaps always true. */
507416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
50864119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
509da3a660dSBarry Smith   return 0;
510da3a660dSBarry Smith }
511da3a660dSBarry Smith 
5121eb62cbbSBarry Smith /*
5131eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5141eb62cbbSBarry Smith    diagonal block
5151eb62cbbSBarry Smith */
516416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5171eb62cbbSBarry Smith {
518416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
51944cd7ae7SLois Curfman McInnes   if (a->M != a->N)
52044cd7ae7SLois Curfman McInnes     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
521416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5221eb62cbbSBarry Smith }
5231eb62cbbSBarry Smith 
524052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
525052efed2SBarry Smith {
526052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
527052efed2SBarry Smith   int        ierr;
528052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
529052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
530052efed2SBarry Smith   return 0;
531052efed2SBarry Smith }
532052efed2SBarry Smith 
53344a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5341eb62cbbSBarry Smith {
5351eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
53644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5371eb62cbbSBarry Smith   int        ierr;
538a5a9c739SBarry Smith #if defined(PETSC_LOG)
5396e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
540a5a9c739SBarry Smith #endif
5410452661fSBarry Smith   PetscFree(aij->rowners);
54278b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
54378b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5440452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5450452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5461eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
547a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5487a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
5490452661fSBarry Smith   PetscFree(aij);
550a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5510452661fSBarry Smith   PetscHeaderDestroy(mat);
5521eb62cbbSBarry Smith   return 0;
5531eb62cbbSBarry Smith }
5547857610eSBarry Smith #include "draw.h"
555b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
556ee50ffe9SBarry Smith 
557416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5581eb62cbbSBarry Smith {
559416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
560416022c9SBarry Smith   int         ierr;
561416022c9SBarry Smith 
56217699dbbSLois Curfman McInnes   if (aij->size == 1) {
563416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
564416022c9SBarry Smith   }
565a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
566416022c9SBarry Smith   return 0;
567416022c9SBarry Smith }
568416022c9SBarry Smith 
569d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
570416022c9SBarry Smith {
57144a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
572dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
573a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
574d636dbe3SBarry Smith   FILE        *fd;
57519bcc07fSBarry Smith   ViewerType  vtype;
576416022c9SBarry Smith 
57719bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
57819bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
57990ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
58090ace30eSBarry Smith     if (format == ASCII_FORMAT_INFO_DETAILED) {
58195e01e2fSLois Curfman McInnes       int nz, nzalloc, mem, flg;
582a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
58390ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
584a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
58595e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
58677c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
58795e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
58895e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
58995e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
59095e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
59108480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
59295e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz);
59308480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
59495e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz);
595a56f8943SBarry Smith       fflush(fd);
59677c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
597a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
598416022c9SBarry Smith       return 0;
599416022c9SBarry Smith     }
60090ace30eSBarry Smith     else if (format == ASCII_FORMAT_INFO) {
60108480c60SBarry Smith       return 0;
60208480c60SBarry Smith     }
603416022c9SBarry Smith   }
604416022c9SBarry Smith 
60519bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
60619bcc07fSBarry Smith     Draw       draw;
60719bcc07fSBarry Smith     PetscTruth isnull;
60819bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
60919bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
61019bcc07fSBarry Smith   }
61119bcc07fSBarry Smith 
61219bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
61390ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
61477c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
615d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
61617699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
6171eb62cbbSBarry Smith            aij->cend);
61878b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
61978b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
620d13ab20cSBarry Smith     fflush(fd);
62177c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
622d13ab20cSBarry Smith   }
623416022c9SBarry Smith   else {
624a56f8943SBarry Smith     int size = aij->size;
625a56f8943SBarry Smith     rank = aij->rank;
62617699dbbSLois Curfman McInnes     if (size == 1) {
62778b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
62895373324SBarry Smith     }
62995373324SBarry Smith     else {
63095373324SBarry Smith       /* assemble the entire matrix onto first processor. */
63195373324SBarry Smith       Mat         A;
632ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6332eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
63495373324SBarry Smith       Scalar      *a;
6352ee70a88SLois Curfman McInnes 
63617699dbbSLois Curfman McInnes       if (!rank) {
637b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
638c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
63995373324SBarry Smith       }
64095373324SBarry Smith       else {
641b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
642c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
64395373324SBarry Smith       }
644464493b3SBarry Smith       PLogObjectParent(mat,A);
645416022c9SBarry Smith 
64695373324SBarry Smith       /* copy over the A part */
647ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6482ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
64995373324SBarry Smith       row = aij->rstart;
650dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
65195373324SBarry Smith       for ( i=0; i<m; i++ ) {
652416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
65395373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
65495373324SBarry Smith       }
6552ee70a88SLois Curfman McInnes       aj = Aloc->j;
656dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
65795373324SBarry Smith 
65895373324SBarry Smith       /* copy over the B part */
659ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6602ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
66195373324SBarry Smith       row = aij->rstart;
6620452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
663dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
66495373324SBarry Smith       for ( i=0; i<m; i++ ) {
665416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
66695373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
66795373324SBarry Smith       }
6680452661fSBarry Smith       PetscFree(ct);
6696d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
6706d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
67117699dbbSLois Curfman McInnes       if (!rank) {
67278b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
67395373324SBarry Smith       }
67478b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
67595373324SBarry Smith     }
67695373324SBarry Smith   }
6771eb62cbbSBarry Smith   return 0;
6781eb62cbbSBarry Smith }
6791eb62cbbSBarry Smith 
680416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
681416022c9SBarry Smith {
682416022c9SBarry Smith   Mat         mat = (Mat) obj;
683416022c9SBarry Smith   int         ierr;
68419bcc07fSBarry Smith   ViewerType  vtype;
685416022c9SBarry Smith 
68619bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
68719bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
68819bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
689d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
690416022c9SBarry Smith   }
69119bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
692416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
693416022c9SBarry Smith   }
694416022c9SBarry Smith   return 0;
695416022c9SBarry Smith }
696416022c9SBarry Smith 
6971eb62cbbSBarry Smith /*
6981eb62cbbSBarry Smith     This has to provide several versions.
6991eb62cbbSBarry Smith 
7001eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
7011eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
702d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
7031eb62cbbSBarry Smith */
704fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
705dbb450caSBarry Smith                            double fshift,int its,Vec xx)
7068a729477SBarry Smith {
70744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
708d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
709ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
710*c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
7116abc6512SBarry Smith   int        ierr,*idx, *diag;
712416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
7138a729477SBarry Smith 
714d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
715dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
716dbb450caSBarry Smith   ls = ls + shift;
717ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
718d6dfbf8fSBarry Smith   diag = A->diag;
719*c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
720da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
72138597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
722da3a660dSBarry Smith     }
72364119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
72478b31e54SBarry Smith     CHKERRQ(ierr);
72564119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
72678b31e54SBarry Smith     CHKERRQ(ierr);
727d6dfbf8fSBarry Smith     while (its--) {
728d6dfbf8fSBarry Smith       /* go down through the rows */
729d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
730d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
731dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
732dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
733d6dfbf8fSBarry Smith         sum  = b[i];
734d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
735dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
736d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
737dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
738dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
739d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
740d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
741d6dfbf8fSBarry Smith       }
742d6dfbf8fSBarry Smith       /* come up through the rows */
743d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
744d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
745dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
746dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
747d6dfbf8fSBarry Smith         sum  = b[i];
748d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
749dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
750d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
751dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
752dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
753d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
754d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
755d6dfbf8fSBarry Smith       }
756d6dfbf8fSBarry Smith     }
757d6dfbf8fSBarry Smith   }
758d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
759da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
76038597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
761da3a660dSBarry Smith     }
76264119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
76378b31e54SBarry Smith     CHKERRQ(ierr);
76464119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
76578b31e54SBarry Smith     CHKERRQ(ierr);
766d6dfbf8fSBarry Smith     while (its--) {
767d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
768d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
769dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
770dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
771d6dfbf8fSBarry Smith         sum  = b[i];
772d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
773dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
774d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
775dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
776dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
777d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
778d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
779d6dfbf8fSBarry Smith       }
780d6dfbf8fSBarry Smith     }
781d6dfbf8fSBarry Smith   }
782d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
783da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
78438597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
785da3a660dSBarry Smith     }
78664119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
78778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
78864119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
78978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
790d6dfbf8fSBarry Smith     while (its--) {
791d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
792d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
793dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
794dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
795d6dfbf8fSBarry Smith         sum  = b[i];
796d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
797dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
798d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
799dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
800dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
801d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
802d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
803d6dfbf8fSBarry Smith       }
804d6dfbf8fSBarry Smith     }
805d6dfbf8fSBarry Smith   }
806*c16cb8f2SBarry Smith   else {
807*c16cb8f2SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
808*c16cb8f2SBarry Smith   }
8098a729477SBarry Smith   return 0;
8108a729477SBarry Smith }
811a66be287SLois Curfman McInnes 
812fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
813fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
814a66be287SLois Curfman McInnes {
815a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
816a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
817a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
818a66be287SLois Curfman McInnes 
81978b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
82078b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
821a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
822a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
823bcd2baecSBarry Smith     if (nz)       *nz      = isend[0];
824bcd2baecSBarry Smith     if (nzalloc)  *nzalloc = isend[1];
825bcd2baecSBarry Smith     if (mem)      *mem     = isend[2];
826a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
827d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
828bcd2baecSBarry Smith     if (nz)      *nz      = irecv[0];
829bcd2baecSBarry Smith     if (nzalloc) *nzalloc = irecv[1];
830bcd2baecSBarry Smith     if (mem)     *mem     = irecv[2];
831a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
832d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
833bcd2baecSBarry Smith     if (nz)      *nz      = irecv[0];
834bcd2baecSBarry Smith     if (nzalloc) *nzalloc = irecv[1];
835bcd2baecSBarry Smith     if (mem)     *mem     = irecv[2];
836a66be287SLois Curfman McInnes   }
837a66be287SLois Curfman McInnes   return 0;
838a66be287SLois Curfman McInnes }
839a66be287SLois Curfman McInnes 
840299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
841299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
842299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
843299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
844299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
845299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
846299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
847299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
848299609e3SLois Curfman McInnes 
849416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
850c74985f6SBarry Smith {
851c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
852c74985f6SBarry Smith 
8536d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
8546d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
8556d4a8577SBarry Smith       op == MAT_COLUMNS_SORTED ||
8566d4a8577SBarry Smith       op == MAT_ROW_ORIENTED) {
857c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
858c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
859c74985f6SBarry Smith   }
8606d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
8616d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8626d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
8636d4a8577SBarry Smith            op == MAT_YES_NEW_DIAGONALS)
86494a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
8656d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
8664b0e389bSBarry Smith     a->roworiented = 0;
8674b0e389bSBarry Smith     MatSetOption(a->A,op);
8684b0e389bSBarry Smith     MatSetOption(a->B,op);
8694b0e389bSBarry Smith   }
8706d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
8716d4a8577SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");}
872c0bbcb79SLois Curfman McInnes   else
8734d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
874c74985f6SBarry Smith   return 0;
875c74985f6SBarry Smith }
876c74985f6SBarry Smith 
877d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
878c74985f6SBarry Smith {
87944a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
880c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
881c74985f6SBarry Smith   return 0;
882c74985f6SBarry Smith }
883c74985f6SBarry Smith 
884d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
885c74985f6SBarry Smith {
88644a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
887b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
888c74985f6SBarry Smith   return 0;
889c74985f6SBarry Smith }
890c74985f6SBarry Smith 
891d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
892c74985f6SBarry Smith {
89344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
894c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
895c74985f6SBarry Smith   return 0;
896c74985f6SBarry Smith }
897c74985f6SBarry Smith 
8986d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
8996d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
9006d84be18SBarry Smith 
9016d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
90239e00950SLois Curfman McInnes {
903154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
90470f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
905154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
906154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
90770f0671dSBarry Smith   int        *cmap, *idx_p;
90839e00950SLois Curfman McInnes 
9097a0afa10SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
9107a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
9117a0afa10SBarry Smith 
91270f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
9137a0afa10SBarry Smith     /*
9147a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
9157a0afa10SBarry Smith     */
9167a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
917*c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
918*c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
9197a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
9207a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
9217a0afa10SBarry Smith     }
9227a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
9237a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
9247a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
9257a0afa10SBarry Smith   }
9267a0afa10SBarry Smith 
927b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
928abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
92939e00950SLois Curfman McInnes 
930154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
931154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
932154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
93338597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
93438597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
935154123eaSLois Curfman McInnes   nztot = nzA + nzB;
936154123eaSLois Curfman McInnes 
93770f0671dSBarry Smith   cmap  = mat->garray;
938154123eaSLois Curfman McInnes   if (v  || idx) {
939154123eaSLois Curfman McInnes     if (nztot) {
940154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
94170f0671dSBarry Smith       int imark = -1;
942154123eaSLois Curfman McInnes       if (v) {
94370f0671dSBarry Smith         *v = v_p = mat->rowvalues;
94439e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
94570f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
946154123eaSLois Curfman McInnes           else break;
947154123eaSLois Curfman McInnes         }
948154123eaSLois Curfman McInnes         imark = i;
94970f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
95070f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
951154123eaSLois Curfman McInnes       }
952154123eaSLois Curfman McInnes       if (idx) {
95370f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
95470f0671dSBarry Smith         if (imark > -1) {
95570f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
95670f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
95770f0671dSBarry Smith           }
95870f0671dSBarry Smith         } else {
959154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
96070f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
961154123eaSLois Curfman McInnes             else break;
962154123eaSLois Curfman McInnes           }
963154123eaSLois Curfman McInnes           imark = i;
96470f0671dSBarry Smith         }
96570f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
96670f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
96739e00950SLois Curfman McInnes       }
96839e00950SLois Curfman McInnes     }
9691ca473b0SSatish Balay     else {
9701ca473b0SSatish Balay       if (idx) *idx = 0;
9711ca473b0SSatish Balay       if (v)   *v   = 0;
9721ca473b0SSatish Balay     }
973154123eaSLois Curfman McInnes   }
97439e00950SLois Curfman McInnes   *nz = nztot;
97538597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
97638597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
97739e00950SLois Curfman McInnes   return 0;
97839e00950SLois Curfman McInnes }
97939e00950SLois Curfman McInnes 
9806d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
98139e00950SLois Curfman McInnes {
9827a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
9837a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
9847a0afa10SBarry Smith     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
9857a0afa10SBarry Smith   }
9867a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
98739e00950SLois Curfman McInnes   return 0;
98839e00950SLois Curfman McInnes }
98939e00950SLois Curfman McInnes 
990cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
991855ac2c5SLois Curfman McInnes {
992855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
993ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
994416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
995416022c9SBarry Smith   double     sum = 0.0;
99604ca555eSLois Curfman McInnes   Scalar     *v;
99704ca555eSLois Curfman McInnes 
99817699dbbSLois Curfman McInnes   if (aij->size == 1) {
99914183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
100037fa93a5SLois Curfman McInnes   } else {
100104ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
100204ca555eSLois Curfman McInnes       v = amat->a;
100304ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
100404ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
100504ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
100604ca555eSLois Curfman McInnes #else
100704ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
100804ca555eSLois Curfman McInnes #endif
100904ca555eSLois Curfman McInnes       }
101004ca555eSLois Curfman McInnes       v = bmat->a;
101104ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
101204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
101304ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
101404ca555eSLois Curfman McInnes #else
101504ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
101604ca555eSLois Curfman McInnes #endif
101704ca555eSLois Curfman McInnes       }
10186d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
101904ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
102004ca555eSLois Curfman McInnes     }
102104ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
102204ca555eSLois Curfman McInnes       double *tmp, *tmp2;
102304ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
10240452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
10250452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1026cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
102704ca555eSLois Curfman McInnes       *norm = 0.0;
102804ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
102904ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1030579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
103104ca555eSLois Curfman McInnes       }
103204ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
103304ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1034579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
103504ca555eSLois Curfman McInnes       }
10366d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
103704ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
103804ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
103904ca555eSLois Curfman McInnes       }
10400452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
104104ca555eSLois Curfman McInnes     }
104204ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1043515d9167SLois Curfman McInnes       double ntemp = 0.0;
104404ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1045dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
104604ca555eSLois Curfman McInnes         sum = 0.0;
104704ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1048cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
104904ca555eSLois Curfman McInnes         }
1050dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
105104ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1052cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
105304ca555eSLois Curfman McInnes         }
1054515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
105504ca555eSLois Curfman McInnes       }
10566d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
105704ca555eSLois Curfman McInnes     }
105804ca555eSLois Curfman McInnes     else {
1059515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
106004ca555eSLois Curfman McInnes     }
106137fa93a5SLois Curfman McInnes   }
1062855ac2c5SLois Curfman McInnes   return 0;
1063855ac2c5SLois Curfman McInnes }
1064855ac2c5SLois Curfman McInnes 
10650de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1066b7c46309SBarry Smith {
1067b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1068dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1069416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1070416022c9SBarry Smith   Mat        B;
1071b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1072b7c46309SBarry Smith   Scalar     *array;
1073b7c46309SBarry Smith 
10743638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
10753638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1076b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1077b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1078b7c46309SBarry Smith 
1079b7c46309SBarry Smith   /* copy over the A part */
1080ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1081b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1082b7c46309SBarry Smith   row = a->rstart;
1083dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1084b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1085416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1086b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1087b7c46309SBarry Smith   }
1088b7c46309SBarry Smith   aj = Aloc->j;
10894af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1090b7c46309SBarry Smith 
1091b7c46309SBarry Smith   /* copy over the B part */
1092ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1093b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1094b7c46309SBarry Smith   row = a->rstart;
10950452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1096dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1097b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1098416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1099b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1100b7c46309SBarry Smith   }
11010452661fSBarry Smith   PetscFree(ct);
11026d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11036d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11043638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
11050de55854SLois Curfman McInnes     *matout = B;
11060de55854SLois Curfman McInnes   } else {
11070de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
11080452661fSBarry Smith     PetscFree(a->rowners);
11090de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
11100de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
11110452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
11120452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
11130de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1114a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
11150452661fSBarry Smith     PetscFree(a);
1116416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
11170452661fSBarry Smith     PetscHeaderDestroy(B);
11180de55854SLois Curfman McInnes   }
1119b7c46309SBarry Smith   return 0;
1120b7c46309SBarry Smith }
1121b7c46309SBarry Smith 
1122682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1123682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1124682d7d0cSBarry Smith {
1125682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1126682d7d0cSBarry Smith 
1127682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1128682d7d0cSBarry Smith   else return 0;
1129682d7d0cSBarry Smith }
1130682d7d0cSBarry Smith 
11315a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
11325a838052SSatish Balay {
11335a838052SSatish Balay   *bs = 1;
11345a838052SSatish Balay   return 0;
11355a838052SSatish Balay }
11365a838052SSatish Balay 
1137fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
11383d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
11392f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1140598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
11418a729477SBarry Smith /* -------------------------------------------------------------------*/
11422ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
114339e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
114444a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
114544a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1146299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1147299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1148299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
114944a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1150b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1151a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1152855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1153ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
11541eb62cbbSBarry Smith        0,
1155299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1156299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1157299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1158d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1159299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
11603d1612f7SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1161b49de8d1SLois Curfman McInnes        0,0,0,
1162598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1163052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
11645a838052SSatish Balay        MatScale_MPIAIJ,0,0,0,MatGetBlockSize_MPIAIJ};
11658a729477SBarry Smith 
11661987afe7SBarry Smith /*@C
1167ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
11683a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
11693a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
11703a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
11713a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
11728a729477SBarry Smith 
11738a729477SBarry Smith    Input Parameters:
11741eb62cbbSBarry Smith .  comm - MPI communicator
11757d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
117692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
117792e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
117892e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
117992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
118092e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
11817d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
118292e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1183ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1184ff756334SLois Curfman McInnes            (same for all local rows)
11852bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
118692e8d321SLois Curfman McInnes            diagonal portion of the local submatrix (possibly different for each row)
11872bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
11882bd5e0b2SLois Curfman McInnes            it is zero.
11892bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1190ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
11912bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
11922bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
11932bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
11948a729477SBarry Smith 
1195ff756334SLois Curfman McInnes    Output Parameter:
119644cd7ae7SLois Curfman McInnes .  A - the matrix
11978a729477SBarry Smith 
1198ff756334SLois Curfman McInnes    Notes:
1199ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1200ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
12010002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
12020002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1203ff756334SLois Curfman McInnes 
1204ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1205ff756334SLois Curfman McInnes    (possibly both).
1206ff756334SLois Curfman McInnes 
12075511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
12085511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
12095511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
12105511cfe3SLois Curfman McInnes 
12115511cfe3SLois Curfman McInnes    Options Database Keys:
12125511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
12136e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
12146e62573dSLois Curfman McInnes $        (max limit=5)
12156e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
12166e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
12176e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
12185511cfe3SLois Curfman McInnes 
1219e0245417SLois Curfman McInnes    Storage Information:
1220e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1221e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1222e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1223e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1224e0245417SLois Curfman McInnes 
1225e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
12265ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
12275ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
12285ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
12295ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1230ff756334SLois Curfman McInnes 
12315511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
12325511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
12332191d07cSBarry Smith 
1234b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1235b810aeb4SBarry Smith $         -------------------
12365511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
12375511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
12385511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
12395511cfe3SLois Curfman McInnes $         -------------------
1240b810aeb4SBarry Smith $
1241b810aeb4SBarry Smith 
12425511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
12435511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
12445511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
12455511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
12465511cfe3SLois Curfman McInnes 
12475511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
12485511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
12495511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
12503d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
125192e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
12523d323bbdSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
12533a511b96SLois Curfman McInnes 
1254dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1255ff756334SLois Curfman McInnes 
1256fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
12578a729477SBarry Smith @*/
12581eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
125944cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
12608a729477SBarry Smith {
126144cd7ae7SLois Curfman McInnes   Mat          B;
126244cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
12636abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1264416022c9SBarry Smith 
126544cd7ae7SLois Curfman McInnes   *A = 0;
126644cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
126744cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
126844cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
126944cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
127044cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
127144cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
127244cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
127344cd7ae7SLois Curfman McInnes   B->factor     = 0;
127444cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
1275d6dfbf8fSBarry Smith 
127644cd7ae7SLois Curfman McInnes   b->insertmode = NOT_SET_VALUES;
127744cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
127844cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
12791eb62cbbSBarry Smith 
1280b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
12812e0155e0SLois Curfman McInnes     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
12821987afe7SBarry Smith 
1283dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
12841eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1285d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1286dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1287dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
12881eb62cbbSBarry Smith   }
128944cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
129044cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
129144cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
129244cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
129344cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
129444cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
12951eb62cbbSBarry Smith 
12961eb62cbbSBarry Smith   /* build local table of row and column ownerships */
129744cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
129844cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1299603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
130044cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
130144cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
130244cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
130344cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
13048a729477SBarry Smith   }
130544cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
130644cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
130744cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
130844cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
130944cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
131044cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
13118a729477SBarry Smith   }
131244cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
131344cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
13148a729477SBarry Smith 
13155ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
131644cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
131744cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
13187b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
131944cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
132044cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
13218a729477SBarry Smith 
13221eb62cbbSBarry Smith   /* build cache for off array entries formed */
132344cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
132444cd7ae7SLois Curfman McInnes   b->colmap      = 0;
132544cd7ae7SLois Curfman McInnes   b->garray      = 0;
132644cd7ae7SLois Curfman McInnes   b->roworiented = 1;
13278a729477SBarry Smith 
13281eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
132944cd7ae7SLois Curfman McInnes   b->lvec      = 0;
133044cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
13318a729477SBarry Smith 
13327a0afa10SBarry Smith   /* stuff for MatGetRow() */
133344cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
133444cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
133544cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
13367a0afa10SBarry Smith 
133744cd7ae7SLois Curfman McInnes   *A = B;
1338d6dfbf8fSBarry Smith   return 0;
1339d6dfbf8fSBarry Smith }
1340c74985f6SBarry Smith 
13413d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1342d6dfbf8fSBarry Smith {
1343d6dfbf8fSBarry Smith   Mat        mat;
1344416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1345a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1346d6dfbf8fSBarry Smith 
1347416022c9SBarry Smith   *newmat       = 0;
13480452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1349a5a9c739SBarry Smith   PLogObjectCreate(mat);
13500452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1351416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
135244a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
135344a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1354d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1355c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1356d6dfbf8fSBarry Smith 
135744cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
135844cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
135944cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
136044cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1361d6dfbf8fSBarry Smith 
1362416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1363416022c9SBarry Smith   a->rend         = oldmat->rend;
1364416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1365416022c9SBarry Smith   a->cend         = oldmat->cend;
136617699dbbSLois Curfman McInnes   a->size         = oldmat->size;
136717699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
136864119d58SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
1369bcd2baecSBarry Smith   a->rowvalues    = 0;
1370bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1371d6dfbf8fSBarry Smith 
1372603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1373603f58a4SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1374603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1375603f58a4SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1376416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
13772ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
13780452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1379416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1380416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1381416022c9SBarry Smith   } else a->colmap = 0;
1382ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
13830452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1384464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1385416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1386416022c9SBarry Smith   } else a->garray = 0;
1387d6dfbf8fSBarry Smith 
1388416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1389416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1390a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1391416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1392416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1393416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1394416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1395416022c9SBarry Smith   PLogObjectParent(mat,a->B);
13965dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
139725cdf11fSBarry Smith   if (flg) {
1398682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1399682d7d0cSBarry Smith   }
14008a729477SBarry Smith   *newmat = mat;
14018a729477SBarry Smith   return 0;
14028a729477SBarry Smith }
1403416022c9SBarry Smith 
140477c4ece6SBarry Smith #include "sys.h"
1405416022c9SBarry Smith 
140619bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1407416022c9SBarry Smith {
1408d65a2f8fSBarry Smith   Mat          A;
1409416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1410d65a2f8fSBarry Smith   Scalar       *vals,*svals;
141119bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1412416022c9SBarry Smith   MPI_Status   status;
141317699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1414d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
141519bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1416416022c9SBarry Smith 
141717699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
141817699dbbSLois Curfman McInnes   if (!rank) {
141990ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
142077c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1421c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1422416022c9SBarry Smith   }
1423416022c9SBarry Smith 
1424416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1425416022c9SBarry Smith   M = header[1]; N = header[2];
1426416022c9SBarry Smith   /* determine ownership of all rows */
142717699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
14280452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1429416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1430416022c9SBarry Smith   rowners[0] = 0;
143117699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1432416022c9SBarry Smith     rowners[i] += rowners[i-1];
1433416022c9SBarry Smith   }
143417699dbbSLois Curfman McInnes   rstart = rowners[rank];
143517699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1436416022c9SBarry Smith 
1437416022c9SBarry Smith   /* distribute row lengths to all processors */
14380452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1439416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
144017699dbbSLois Curfman McInnes   if (!rank) {
14410452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
144277c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
14430452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
144417699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1445d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
14460452661fSBarry Smith     PetscFree(sndcounts);
1447416022c9SBarry Smith   }
1448416022c9SBarry Smith   else {
1449416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1450416022c9SBarry Smith   }
1451416022c9SBarry Smith 
145217699dbbSLois Curfman McInnes   if (!rank) {
1453416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
14540452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1455cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
145617699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1457416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1458416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1459416022c9SBarry Smith       }
1460416022c9SBarry Smith     }
14610452661fSBarry Smith     PetscFree(rowlengths);
1462416022c9SBarry Smith 
1463416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1464416022c9SBarry Smith     maxnz = 0;
146517699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
14660452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1467416022c9SBarry Smith     }
14680452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1469416022c9SBarry Smith 
1470416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1471416022c9SBarry Smith     nz = procsnz[0];
14720452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
147377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1474d65a2f8fSBarry Smith 
1475d65a2f8fSBarry Smith     /* read in every one elses and ship off */
147617699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1477d65a2f8fSBarry Smith       nz = procsnz[i];
147877c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1479d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1480d65a2f8fSBarry Smith     }
14810452661fSBarry Smith     PetscFree(cols);
1482416022c9SBarry Smith   }
1483416022c9SBarry Smith   else {
1484416022c9SBarry Smith     /* determine buffer space needed for message */
1485416022c9SBarry Smith     nz = 0;
1486416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1487416022c9SBarry Smith       nz += ourlens[i];
1488416022c9SBarry Smith     }
14890452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1490416022c9SBarry Smith 
1491416022c9SBarry Smith     /* receive message of column indices*/
1492d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1493416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1494c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1495416022c9SBarry Smith   }
1496416022c9SBarry Smith 
1497416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1498cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1499416022c9SBarry Smith   jj = 0;
1500416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1501416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1502d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1503416022c9SBarry Smith       jj++;
1504416022c9SBarry Smith     }
1505416022c9SBarry Smith   }
1506d65a2f8fSBarry Smith 
1507d65a2f8fSBarry Smith   /* create our matrix */
1508416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1509416022c9SBarry Smith     ourlens[i] -= offlens[i];
1510416022c9SBarry Smith   }
1511d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1512d65a2f8fSBarry Smith   A = *newmat;
15136d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
1514d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1515d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1516d65a2f8fSBarry Smith   }
1517416022c9SBarry Smith 
151817699dbbSLois Curfman McInnes   if (!rank) {
15190452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1520416022c9SBarry Smith 
1521416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1522416022c9SBarry Smith     nz = procsnz[0];
152377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1524d65a2f8fSBarry Smith 
1525d65a2f8fSBarry Smith     /* insert into matrix */
1526d65a2f8fSBarry Smith     jj      = rstart;
1527d65a2f8fSBarry Smith     smycols = mycols;
1528d65a2f8fSBarry Smith     svals   = vals;
1529d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1530d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1531d65a2f8fSBarry Smith       smycols += ourlens[i];
1532d65a2f8fSBarry Smith       svals   += ourlens[i];
1533d65a2f8fSBarry Smith       jj++;
1534416022c9SBarry Smith     }
1535416022c9SBarry Smith 
1536d65a2f8fSBarry Smith     /* read in other processors and ship out */
153717699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1538416022c9SBarry Smith       nz = procsnz[i];
153977c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1540d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1541416022c9SBarry Smith     }
15420452661fSBarry Smith     PetscFree(procsnz);
1543416022c9SBarry Smith   }
1544d65a2f8fSBarry Smith   else {
1545d65a2f8fSBarry Smith     /* receive numeric values */
15460452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1547416022c9SBarry Smith 
1548d65a2f8fSBarry Smith     /* receive message of values*/
1549d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1550d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1551c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1552d65a2f8fSBarry Smith 
1553d65a2f8fSBarry Smith     /* insert into matrix */
1554d65a2f8fSBarry Smith     jj      = rstart;
1555d65a2f8fSBarry Smith     smycols = mycols;
1556d65a2f8fSBarry Smith     svals   = vals;
1557d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1558d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1559d65a2f8fSBarry Smith       smycols += ourlens[i];
1560d65a2f8fSBarry Smith       svals   += ourlens[i];
1561d65a2f8fSBarry Smith       jj++;
1562d65a2f8fSBarry Smith     }
1563d65a2f8fSBarry Smith   }
15640452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1565d65a2f8fSBarry Smith 
15666d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15676d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1568416022c9SBarry Smith   return 0;
1569416022c9SBarry Smith }
1570