xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 70f55243aafb320636e2a54ff30cab5d1e8d3d7b)
1905e6a2fSBarry Smith 
2cb512458SBarry Smith #ifndef lint
3*70f55243SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.159 1996/08/06 16:51:19 balay Exp bsmith $";
4cb512458SBarry Smith #endif
58a729477SBarry Smith 
6*70f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
8d9942c19SSatish Balay #include "src/inline/spops.h"
98a729477SBarry Smith 
109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
129e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
139e25ed09SBarry Smith length of colmap equals the global matrix length.
149e25ed09SBarry Smith */
15905e6a2fSBarry Smith static int CreateColmap_MPIAIJ_Private(Mat mat)
169e25ed09SBarry Smith {
1744a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
18ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
19905e6a2fSBarry Smith   int        n = B->n,i;
20dbb450caSBarry Smith 
210452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
22464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
23cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
24905e6a2fSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
259e25ed09SBarry Smith   return 0;
269e25ed09SBarry Smith }
279e25ed09SBarry Smith 
282493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
292493cbb0SBarry Smith 
3070423df4SBarry Smith static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm)
31299609e3SLois Curfman McInnes {
32299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
33299609e3SLois Curfman McInnes   int        ierr;
3417699dbbSLois Curfman McInnes   if (aij->size == 1) {
3551c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3648d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
37299609e3SLois Curfman McInnes   return 0;
38299609e3SLois Curfman McInnes }
39299609e3SLois Curfman McInnes 
404b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
418a729477SBarry Smith {
4244a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
434b0e389bSBarry Smith   Scalar     value;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
46905e6a2fSBarry Smith   int        roworiented = aij->roworiented;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
534b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
544b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
564b0e389bSBarry Smith       row = im[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
584b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
594b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
604b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
614b0e389bSBarry Smith           col = in[j] - cstart;
624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
634b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
641eb62cbbSBarry Smith         }
651eb62cbbSBarry Smith         else {
66227d817aSBarry Smith           if (mat->was_assembled) {
67905e6a2fSBarry Smith             if (!aij->colmap) {
68905e6a2fSBarry Smith               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
69905e6a2fSBarry Smith             }
70905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
71ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
722493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
734b0e389bSBarry Smith               col =  in[j];
74d6dfbf8fSBarry Smith             }
759e25ed09SBarry Smith           }
764b0e389bSBarry Smith           else col = in[j];
774b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
784b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
791eb62cbbSBarry Smith         }
801eb62cbbSBarry Smith       }
811eb62cbbSBarry Smith     }
821eb62cbbSBarry Smith     else {
834b0e389bSBarry Smith       if (roworiented) {
844b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
854b0e389bSBarry Smith       }
864b0e389bSBarry Smith       else {
874b0e389bSBarry Smith         row = im[i];
884b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
894b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
904b0e389bSBarry Smith         }
914b0e389bSBarry Smith       }
921eb62cbbSBarry Smith     }
938a729477SBarry Smith   }
948a729477SBarry Smith   return 0;
958a729477SBarry Smith }
968a729477SBarry Smith 
97b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
98b49de8d1SLois Curfman McInnes {
99b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
100b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
101b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
102b49de8d1SLois Curfman McInnes 
103b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
104b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
107b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
108b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
109b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
112b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
113b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114b49de8d1SLois Curfman McInnes         }
115b49de8d1SLois Curfman McInnes         else {
116905e6a2fSBarry Smith           if (!aij->colmap) {
117905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
118905e6a2fSBarry Smith           }
119905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
120e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
121d9d09a02SSatish Balay           else {
122b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
123b49de8d1SLois Curfman McInnes           }
124b49de8d1SLois Curfman McInnes         }
125b49de8d1SLois Curfman McInnes       }
126d9d09a02SSatish Balay     }
127b49de8d1SLois Curfman McInnes     else {
128b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
129b49de8d1SLois Curfman McInnes     }
130b49de8d1SLois Curfman McInnes   }
131b49de8d1SLois Curfman McInnes   return 0;
132b49de8d1SLois Curfman McInnes }
133b49de8d1SLois Curfman McInnes 
134fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1358a729477SBarry Smith {
13644a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
137d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
13817699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
13917699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1401eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1416abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1421eb62cbbSBarry Smith   InsertMode  addv;
1431eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1441eb62cbbSBarry Smith 
1451eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
146d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
147dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
148bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1491eb62cbbSBarry Smith   }
1501eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1511eb62cbbSBarry Smith 
1521eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1530452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
154cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1550452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1561eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1571eb62cbbSBarry Smith     idx = aij->stash.idx[i];
15817699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1591eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1601eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1618a729477SBarry Smith       }
1628a729477SBarry Smith     }
1638a729477SBarry Smith   }
16417699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1651eb62cbbSBarry Smith 
1661eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1670452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
16817699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
16917699dbbSLois Curfman McInnes   nreceives = work[rank];
17017699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
17117699dbbSLois Curfman McInnes   nmax = work[rank];
1720452661fSBarry Smith   PetscFree(work);
1731eb62cbbSBarry Smith 
1741eb62cbbSBarry Smith   /* post receives:
1751eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1761eb62cbbSBarry Smith      (global index,value) we store the global index as a double
177d6dfbf8fSBarry Smith      to simplify the message passing.
1781eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1791eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1801eb62cbbSBarry Smith      this is a lot of wasted space.
1811eb62cbbSBarry Smith 
1821eb62cbbSBarry Smith 
1831eb62cbbSBarry Smith        This could be done better.
1841eb62cbbSBarry Smith   */
1850452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
18678b31e54SBarry Smith   CHKPTRQ(rvalues);
1870452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
18878b31e54SBarry Smith   CHKPTRQ(recv_waits);
1891eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
190416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1911eb62cbbSBarry Smith               comm,recv_waits+i);
1921eb62cbbSBarry Smith   }
1931eb62cbbSBarry Smith 
1941eb62cbbSBarry Smith   /* do sends:
1951eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1961eb62cbbSBarry Smith          the ith processor
1971eb62cbbSBarry Smith   */
1980452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1990452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
20078b31e54SBarry Smith   CHKPTRQ(send_waits);
2010452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
2021eb62cbbSBarry Smith   starts[0] = 0;
20317699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2041eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2051eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2061eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2071eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2081eb62cbbSBarry Smith   }
2090452661fSBarry Smith   PetscFree(owner);
2101eb62cbbSBarry Smith   starts[0] = 0;
21117699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2121eb62cbbSBarry Smith   count = 0;
21317699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2141eb62cbbSBarry Smith     if (procs[i]) {
215416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2161eb62cbbSBarry Smith                 comm,send_waits+count++);
2171eb62cbbSBarry Smith     }
2181eb62cbbSBarry Smith   }
2190452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2201eb62cbbSBarry Smith 
2211eb62cbbSBarry Smith   /* Free cache space */
2222191d07cSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
22378b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2241eb62cbbSBarry Smith 
2251eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2261eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2271eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2281eb62cbbSBarry Smith   aij->rmax       = nmax;
2291eb62cbbSBarry Smith 
2301eb62cbbSBarry Smith   return 0;
2311eb62cbbSBarry Smith }
23244a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2331eb62cbbSBarry Smith 
234fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2351eb62cbbSBarry Smith {
23644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
2371eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
238416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
239905e6a2fSBarry Smith   int         row,col,other_disassembled;
2401eb62cbbSBarry Smith   Scalar      *values,val;
2411eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2421eb62cbbSBarry Smith 
2431eb62cbbSBarry Smith   /*  wait on receives */
2441eb62cbbSBarry Smith   while (count) {
245d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2461eb62cbbSBarry Smith     /* unpack receives into our local space */
247d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
248c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2491eb62cbbSBarry Smith     n = n/3;
2501eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
251227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
252227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2531eb62cbbSBarry Smith       val = values[3*i+2];
2541eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2551eb62cbbSBarry Smith         col -= aij->cstart;
2561eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2571eb62cbbSBarry Smith       }
2581eb62cbbSBarry Smith       else {
259227d817aSBarry Smith         if (mat->was_assembled) {
260905e6a2fSBarry Smith           if (!aij->colmap) {
261905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262905e6a2fSBarry Smith           }
263905e6a2fSBarry Smith           col = aij->colmap[col] - 1;
264ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2652493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
266227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
267d6dfbf8fSBarry Smith           }
2689e25ed09SBarry Smith         }
2691eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2701eb62cbbSBarry Smith       }
2711eb62cbbSBarry Smith     }
2721eb62cbbSBarry Smith     count--;
2731eb62cbbSBarry Smith   }
2740452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2751eb62cbbSBarry Smith 
2761eb62cbbSBarry Smith   /* wait on sends */
2771eb62cbbSBarry Smith   if (aij->nsends) {
2780452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
27978b31e54SBarry Smith     CHKPTRQ(send_status);
2801eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2810452661fSBarry Smith     PetscFree(send_status);
2821eb62cbbSBarry Smith   }
2830452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2841eb62cbbSBarry Smith 
28564119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
28678b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
28778b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2881eb62cbbSBarry Smith 
2892493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2902493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
291227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
292227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
2932493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2942493cbb0SBarry Smith   }
2952493cbb0SBarry Smith 
2966d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
29778b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
2985e42470aSBarry Smith   }
29978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
30078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
3015e42470aSBarry Smith 
3027a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
3038a729477SBarry Smith   return 0;
3048a729477SBarry Smith }
3058a729477SBarry Smith 
3062ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3071eb62cbbSBarry Smith {
30844a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
309dbd7a890SLois Curfman McInnes   int        ierr;
31078b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
31178b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3121eb62cbbSBarry Smith   return 0;
3131eb62cbbSBarry Smith }
3141eb62cbbSBarry Smith 
3151eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3161eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3171eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3181eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3191eb62cbbSBarry Smith    routine.
3201eb62cbbSBarry Smith */
32144a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3221eb62cbbSBarry Smith {
32344a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
32417699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3256abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
32617699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3275392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
328d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
329d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3301eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3311eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3321eb62cbbSBarry Smith   IS             istmp;
3331eb62cbbSBarry Smith 
33477c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
33578b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3361eb62cbbSBarry Smith 
3371eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3380452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
339cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3400452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3411eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3421eb62cbbSBarry Smith     idx = rows[i];
3431eb62cbbSBarry Smith     found = 0;
34417699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3451eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3461eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3471eb62cbbSBarry Smith       }
3481eb62cbbSBarry Smith     }
349bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3501eb62cbbSBarry Smith   }
35117699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3521eb62cbbSBarry Smith 
3531eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3540452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
35517699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
35617699dbbSLois Curfman McInnes   nrecvs = work[rank];
35717699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
35817699dbbSLois Curfman McInnes   nmax = work[rank];
3590452661fSBarry Smith   PetscFree(work);
3601eb62cbbSBarry Smith 
3611eb62cbbSBarry Smith   /* post receives:   */
3620452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
36378b31e54SBarry Smith   CHKPTRQ(rvalues);
3640452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
36578b31e54SBarry Smith   CHKPTRQ(recv_waits);
3661eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
367416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3681eb62cbbSBarry Smith   }
3691eb62cbbSBarry Smith 
3701eb62cbbSBarry Smith   /* do sends:
3711eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3721eb62cbbSBarry Smith          the ith processor
3731eb62cbbSBarry Smith   */
3740452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3750452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
37678b31e54SBarry Smith   CHKPTRQ(send_waits);
3770452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3781eb62cbbSBarry Smith   starts[0] = 0;
37917699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3801eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3811eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3821eb62cbbSBarry Smith   }
3831eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3841eb62cbbSBarry Smith 
3851eb62cbbSBarry Smith   starts[0] = 0;
38617699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3871eb62cbbSBarry Smith   count = 0;
38817699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3891eb62cbbSBarry Smith     if (procs[i]) {
390416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3911eb62cbbSBarry Smith     }
3921eb62cbbSBarry Smith   }
3930452661fSBarry Smith   PetscFree(starts);
3941eb62cbbSBarry Smith 
39517699dbbSLois Curfman McInnes   base = owners[rank];
3961eb62cbbSBarry Smith 
3971eb62cbbSBarry Smith   /*  wait on receives */
3980452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3991eb62cbbSBarry Smith   source = lens + nrecvs;
4001eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
4011eb62cbbSBarry Smith   while (count) {
402d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
4031eb62cbbSBarry Smith     /* unpack receives into our local space */
4041eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
405d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
406d6dfbf8fSBarry Smith     lens[imdex]  = n;
4071eb62cbbSBarry Smith     slen += n;
4081eb62cbbSBarry Smith     count--;
4091eb62cbbSBarry Smith   }
4100452661fSBarry Smith   PetscFree(recv_waits);
4111eb62cbbSBarry Smith 
4121eb62cbbSBarry Smith   /* move the data into the send scatter */
4130452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4141eb62cbbSBarry Smith   count = 0;
4151eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4161eb62cbbSBarry Smith     values = rvalues + i*nmax;
4171eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4181eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4191eb62cbbSBarry Smith     }
4201eb62cbbSBarry Smith   }
4210452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4220452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4231eb62cbbSBarry Smith 
4241eb62cbbSBarry Smith   /* actually zap the local rows */
425416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
426464493b3SBarry Smith   PLogObjectParent(A,istmp);
4270452661fSBarry Smith   PetscFree(lrows);
42878b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
42978b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
43078b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4311eb62cbbSBarry Smith 
4321eb62cbbSBarry Smith   /* wait on sends */
4331eb62cbbSBarry Smith   if (nsends) {
4340452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
43578b31e54SBarry Smith     CHKPTRQ(send_status);
4361eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4370452661fSBarry Smith     PetscFree(send_status);
4381eb62cbbSBarry Smith   }
4390452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4401eb62cbbSBarry Smith 
4411eb62cbbSBarry Smith   return 0;
4421eb62cbbSBarry Smith }
4431eb62cbbSBarry Smith 
444416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4451eb62cbbSBarry Smith {
446416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
447fbd6ef76SBarry Smith   int        ierr,nt;
448416022c9SBarry Smith 
449a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
450fbd6ef76SBarry Smith   if (nt != a->n) {
45147b4a8eaSLois Curfman McInnes     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx");
452fbd6ef76SBarry Smith   }
453a2ce50c7SBarry Smith   ierr = VecGetLocalSize(yy,&nt);  CHKERRQ(ierr);
454fbd6ef76SBarry Smith   if (nt != a->m) {
45547b4a8eaSLois Curfman McInnes     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and yy");
456fbd6ef76SBarry Smith   }
45764119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
45838597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
45964119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
46038597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4611eb62cbbSBarry Smith   return 0;
4621eb62cbbSBarry Smith }
4631eb62cbbSBarry Smith 
464416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
465da3a660dSBarry Smith {
466416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
467da3a660dSBarry Smith   int        ierr;
46864119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
46938597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
47064119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
47138597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
472da3a660dSBarry Smith   return 0;
473da3a660dSBarry Smith }
474da3a660dSBarry Smith 
475416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
476da3a660dSBarry Smith {
477416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
478da3a660dSBarry Smith   int        ierr;
479da3a660dSBarry Smith 
480da3a660dSBarry Smith   /* do nondiagonal part */
48138597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
482da3a660dSBarry Smith   /* send it on its way */
483416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
48464119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
485da3a660dSBarry Smith   /* do local part */
48638597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
487da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
488da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
489da3a660dSBarry Smith   /* but is not perhaps always true. */
490416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
49164119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
492da3a660dSBarry Smith   return 0;
493da3a660dSBarry Smith }
494da3a660dSBarry Smith 
495416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
496da3a660dSBarry Smith {
497416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
498da3a660dSBarry Smith   int        ierr;
499da3a660dSBarry Smith 
500da3a660dSBarry Smith   /* do nondiagonal part */
50138597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
502da3a660dSBarry Smith   /* send it on its way */
503416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
50464119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
505da3a660dSBarry Smith   /* do local part */
50638597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
507da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
508da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
509da3a660dSBarry Smith   /* but is not perhaps always true. */
510416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
51164119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
512da3a660dSBarry Smith   return 0;
513da3a660dSBarry Smith }
514da3a660dSBarry Smith 
5151eb62cbbSBarry Smith /*
5161eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5171eb62cbbSBarry Smith    diagonal block
5181eb62cbbSBarry Smith */
519416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5201eb62cbbSBarry Smith {
521416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
52244cd7ae7SLois Curfman McInnes   if (a->M != a->N)
52344cd7ae7SLois Curfman McInnes     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
524416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5251eb62cbbSBarry Smith }
5261eb62cbbSBarry Smith 
527052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
528052efed2SBarry Smith {
529052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
530052efed2SBarry Smith   int        ierr;
531052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
532052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
533052efed2SBarry Smith   return 0;
534052efed2SBarry Smith }
535052efed2SBarry Smith 
53644a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5371eb62cbbSBarry Smith {
5381eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
53944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5401eb62cbbSBarry Smith   int        ierr;
541a5a9c739SBarry Smith #if defined(PETSC_LOG)
5426e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
543a5a9c739SBarry Smith #endif
5440452661fSBarry Smith   PetscFree(aij->rowners);
54578b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
54678b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5470452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5480452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5491eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
550a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5517a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
5520452661fSBarry Smith   PetscFree(aij);
553a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5540452661fSBarry Smith   PetscHeaderDestroy(mat);
5551eb62cbbSBarry Smith   return 0;
5561eb62cbbSBarry Smith }
5577857610eSBarry Smith #include "draw.h"
558b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
559ee50ffe9SBarry Smith 
560416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5611eb62cbbSBarry Smith {
562416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
563416022c9SBarry Smith   int         ierr;
564416022c9SBarry Smith 
56517699dbbSLois Curfman McInnes   if (aij->size == 1) {
566416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
567416022c9SBarry Smith   }
568a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
569416022c9SBarry Smith   return 0;
570416022c9SBarry Smith }
571416022c9SBarry Smith 
572d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
573416022c9SBarry Smith {
57444a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
575dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
576a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
577d636dbe3SBarry Smith   FILE        *fd;
57819bcc07fSBarry Smith   ViewerType  vtype;
579416022c9SBarry Smith 
58019bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
58119bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
58290ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
58390ace30eSBarry Smith     if (format == ASCII_FORMAT_INFO_DETAILED) {
58495e01e2fSLois Curfman McInnes       int nz, nzalloc, mem, flg;
585a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
58690ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
587a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
58895e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
58977c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
59095e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
59195e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
59295e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
59395e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
59408480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
59595e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz);
59608480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
59795e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz);
598a56f8943SBarry Smith       fflush(fd);
59977c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
600a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
601416022c9SBarry Smith       return 0;
602416022c9SBarry Smith     }
60390ace30eSBarry Smith     else if (format == ASCII_FORMAT_INFO) {
60408480c60SBarry Smith       return 0;
60508480c60SBarry Smith     }
606416022c9SBarry Smith   }
607416022c9SBarry Smith 
60819bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
60919bcc07fSBarry Smith     Draw       draw;
61019bcc07fSBarry Smith     PetscTruth isnull;
61119bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
61219bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
61319bcc07fSBarry Smith   }
61419bcc07fSBarry Smith 
61519bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
61690ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
61777c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
618d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
61917699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
6201eb62cbbSBarry Smith            aij->cend);
62178b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
62278b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
623d13ab20cSBarry Smith     fflush(fd);
62477c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
625d13ab20cSBarry Smith   }
626416022c9SBarry Smith   else {
627a56f8943SBarry Smith     int size = aij->size;
628a56f8943SBarry Smith     rank = aij->rank;
62917699dbbSLois Curfman McInnes     if (size == 1) {
63078b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
63195373324SBarry Smith     }
63295373324SBarry Smith     else {
63395373324SBarry Smith       /* assemble the entire matrix onto first processor. */
63495373324SBarry Smith       Mat         A;
635ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6362eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
63795373324SBarry Smith       Scalar      *a;
6382ee70a88SLois Curfman McInnes 
63917699dbbSLois Curfman McInnes       if (!rank) {
640b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
641c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
64295373324SBarry Smith       }
64395373324SBarry Smith       else {
644b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
645c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
64695373324SBarry Smith       }
647464493b3SBarry Smith       PLogObjectParent(mat,A);
648416022c9SBarry Smith 
64995373324SBarry Smith       /* copy over the A part */
650ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6512ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
65295373324SBarry Smith       row = aij->rstart;
653dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
65495373324SBarry Smith       for ( i=0; i<m; i++ ) {
655416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
65695373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
65795373324SBarry Smith       }
6582ee70a88SLois Curfman McInnes       aj = Aloc->j;
659dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
66095373324SBarry Smith 
66195373324SBarry Smith       /* copy over the B part */
662ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6632ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
66495373324SBarry Smith       row = aij->rstart;
6650452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
666dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
66795373324SBarry Smith       for ( i=0; i<m; i++ ) {
668416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
66995373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
67095373324SBarry Smith       }
6710452661fSBarry Smith       PetscFree(ct);
6726d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
6736d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
67417699dbbSLois Curfman McInnes       if (!rank) {
67578b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
67695373324SBarry Smith       }
67778b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
67895373324SBarry Smith     }
67995373324SBarry Smith   }
6801eb62cbbSBarry Smith   return 0;
6811eb62cbbSBarry Smith }
6821eb62cbbSBarry Smith 
683416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
684416022c9SBarry Smith {
685416022c9SBarry Smith   Mat         mat = (Mat) obj;
686416022c9SBarry Smith   int         ierr;
68719bcc07fSBarry Smith   ViewerType  vtype;
688416022c9SBarry Smith 
68919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
69019bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
69119bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
692d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
693416022c9SBarry Smith   }
69419bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
695416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
696416022c9SBarry Smith   }
697416022c9SBarry Smith   return 0;
698416022c9SBarry Smith }
699416022c9SBarry Smith 
7001eb62cbbSBarry Smith /*
7011eb62cbbSBarry Smith     This has to provide several versions.
7021eb62cbbSBarry Smith 
7031eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
7041eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
705d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
7061eb62cbbSBarry Smith */
707fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
708dbb450caSBarry Smith                            double fshift,int its,Vec xx)
7098a729477SBarry Smith {
71044a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
711d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
712ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
713c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
7146abc6512SBarry Smith   int        ierr,*idx, *diag;
715416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
7168a729477SBarry Smith 
717d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
718dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
719dbb450caSBarry Smith   ls = ls + shift;
720ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
721d6dfbf8fSBarry Smith   diag = A->diag;
722c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
723da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
72438597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
725da3a660dSBarry Smith     }
72664119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
72778b31e54SBarry Smith     CHKERRQ(ierr);
72864119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
72978b31e54SBarry Smith     CHKERRQ(ierr);
730d6dfbf8fSBarry Smith     while (its--) {
731d6dfbf8fSBarry Smith       /* go down through the rows */
732d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
733d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
734dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
735dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
736d6dfbf8fSBarry Smith         sum  = b[i];
737d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
738dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
739d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
740dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
741dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
742d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
743d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
744d6dfbf8fSBarry Smith       }
745d6dfbf8fSBarry Smith       /* come up through the rows */
746d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
747d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
748dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
749dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
750d6dfbf8fSBarry Smith         sum  = b[i];
751d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
752dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
753d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
754dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
755dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
756d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
757d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
758d6dfbf8fSBarry Smith       }
759d6dfbf8fSBarry Smith     }
760d6dfbf8fSBarry Smith   }
761d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
762da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
76338597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
764da3a660dSBarry Smith     }
76564119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
76678b31e54SBarry Smith     CHKERRQ(ierr);
76764119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
76878b31e54SBarry Smith     CHKERRQ(ierr);
769d6dfbf8fSBarry Smith     while (its--) {
770d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
771d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
772dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
773dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
774d6dfbf8fSBarry Smith         sum  = b[i];
775d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
776dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
777d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
778dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
779dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
780d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
781d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
782d6dfbf8fSBarry Smith       }
783d6dfbf8fSBarry Smith     }
784d6dfbf8fSBarry Smith   }
785d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
786da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
78738597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
788da3a660dSBarry Smith     }
78964119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
79078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
79164119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
79278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
793d6dfbf8fSBarry Smith     while (its--) {
794d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
795d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
796dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
797dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
798d6dfbf8fSBarry Smith         sum  = b[i];
799d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
800dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
801d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
802dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
803dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
804d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
805d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
806d6dfbf8fSBarry Smith       }
807d6dfbf8fSBarry Smith     }
808d6dfbf8fSBarry Smith   }
809c16cb8f2SBarry Smith   else {
810c16cb8f2SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
811c16cb8f2SBarry Smith   }
8128a729477SBarry Smith   return 0;
8138a729477SBarry Smith }
814a66be287SLois Curfman McInnes 
815fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
816fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
817a66be287SLois Curfman McInnes {
818a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
819a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
820a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
821a66be287SLois Curfman McInnes 
82278b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
82378b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
824a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
825a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
826bcd2baecSBarry Smith     if (nz)       *nz      = isend[0];
827bcd2baecSBarry Smith     if (nzalloc)  *nzalloc = isend[1];
828bcd2baecSBarry Smith     if (mem)      *mem     = isend[2];
829a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
830d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
831bcd2baecSBarry Smith     if (nz)      *nz      = irecv[0];
832bcd2baecSBarry Smith     if (nzalloc) *nzalloc = irecv[1];
833bcd2baecSBarry Smith     if (mem)     *mem     = irecv[2];
834a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
835d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
836bcd2baecSBarry Smith     if (nz)      *nz      = irecv[0];
837bcd2baecSBarry Smith     if (nzalloc) *nzalloc = irecv[1];
838bcd2baecSBarry Smith     if (mem)     *mem     = irecv[2];
839a66be287SLois Curfman McInnes   }
840a66be287SLois Curfman McInnes   return 0;
841a66be287SLois Curfman McInnes }
842a66be287SLois Curfman McInnes 
843299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
844299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
845299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
846299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
847299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
848299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
849299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
850299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
851299609e3SLois Curfman McInnes 
852416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
853c74985f6SBarry Smith {
854c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
855c74985f6SBarry Smith 
8566d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
8576d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
8586d4a8577SBarry Smith       op == MAT_COLUMNS_SORTED ||
8596d4a8577SBarry Smith       op == MAT_ROW_ORIENTED) {
860c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
861c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
862c74985f6SBarry Smith   }
8636d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
8646d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8656d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
8666d4a8577SBarry Smith            op == MAT_YES_NEW_DIAGONALS)
86794a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
8686d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
8694b0e389bSBarry Smith     a->roworiented = 0;
8704b0e389bSBarry Smith     MatSetOption(a->A,op);
8714b0e389bSBarry Smith     MatSetOption(a->B,op);
8724b0e389bSBarry Smith   }
8736d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
8746d4a8577SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");}
875c0bbcb79SLois Curfman McInnes   else
8764d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
877c74985f6SBarry Smith   return 0;
878c74985f6SBarry Smith }
879c74985f6SBarry Smith 
880d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
881c74985f6SBarry Smith {
88244a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
883c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
884c74985f6SBarry Smith   return 0;
885c74985f6SBarry Smith }
886c74985f6SBarry Smith 
887d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
888c74985f6SBarry Smith {
88944a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
890b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
891c74985f6SBarry Smith   return 0;
892c74985f6SBarry Smith }
893c74985f6SBarry Smith 
894d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
895c74985f6SBarry Smith {
89644a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
897c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
898c74985f6SBarry Smith   return 0;
899c74985f6SBarry Smith }
900c74985f6SBarry Smith 
9016d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
9026d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
9036d84be18SBarry Smith 
9046d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
90539e00950SLois Curfman McInnes {
906154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
90770f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
908154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
909154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
91070f0671dSBarry Smith   int        *cmap, *idx_p;
91139e00950SLois Curfman McInnes 
9127a0afa10SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
9137a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
9147a0afa10SBarry Smith 
91570f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
9167a0afa10SBarry Smith     /*
9177a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
9187a0afa10SBarry Smith     */
9197a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
920c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
921c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
9227a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
9237a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
9247a0afa10SBarry Smith     }
9257a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
9267a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
9277a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
9287a0afa10SBarry Smith   }
9297a0afa10SBarry Smith 
930b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
931abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
93239e00950SLois Curfman McInnes 
933154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
934154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
935154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
93638597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
93738597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
938154123eaSLois Curfman McInnes   nztot = nzA + nzB;
939154123eaSLois Curfman McInnes 
94070f0671dSBarry Smith   cmap  = mat->garray;
941154123eaSLois Curfman McInnes   if (v  || idx) {
942154123eaSLois Curfman McInnes     if (nztot) {
943154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
94470f0671dSBarry Smith       int imark = -1;
945154123eaSLois Curfman McInnes       if (v) {
94670f0671dSBarry Smith         *v = v_p = mat->rowvalues;
94739e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
94870f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
949154123eaSLois Curfman McInnes           else break;
950154123eaSLois Curfman McInnes         }
951154123eaSLois Curfman McInnes         imark = i;
95270f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
95370f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
954154123eaSLois Curfman McInnes       }
955154123eaSLois Curfman McInnes       if (idx) {
95670f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
95770f0671dSBarry Smith         if (imark > -1) {
95870f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
95970f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
96070f0671dSBarry Smith           }
96170f0671dSBarry Smith         } else {
962154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
96370f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
964154123eaSLois Curfman McInnes             else break;
965154123eaSLois Curfman McInnes           }
966154123eaSLois Curfman McInnes           imark = i;
96770f0671dSBarry Smith         }
96870f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
96970f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
97039e00950SLois Curfman McInnes       }
97139e00950SLois Curfman McInnes     }
9721ca473b0SSatish Balay     else {
9731ca473b0SSatish Balay       if (idx) *idx = 0;
9741ca473b0SSatish Balay       if (v)   *v   = 0;
9751ca473b0SSatish Balay     }
976154123eaSLois Curfman McInnes   }
97739e00950SLois Curfman McInnes   *nz = nztot;
97838597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
97938597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
98039e00950SLois Curfman McInnes   return 0;
98139e00950SLois Curfman McInnes }
98239e00950SLois Curfman McInnes 
9836d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
98439e00950SLois Curfman McInnes {
9857a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
9867a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
9877a0afa10SBarry Smith     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
9887a0afa10SBarry Smith   }
9897a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
99039e00950SLois Curfman McInnes   return 0;
99139e00950SLois Curfman McInnes }
99239e00950SLois Curfman McInnes 
993cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
994855ac2c5SLois Curfman McInnes {
995855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
996ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
997416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
998416022c9SBarry Smith   double     sum = 0.0;
99904ca555eSLois Curfman McInnes   Scalar     *v;
100004ca555eSLois Curfman McInnes 
100117699dbbSLois Curfman McInnes   if (aij->size == 1) {
100214183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
100337fa93a5SLois Curfman McInnes   } else {
100404ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
100504ca555eSLois Curfman McInnes       v = amat->a;
100604ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
100704ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
100804ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
100904ca555eSLois Curfman McInnes #else
101004ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
101104ca555eSLois Curfman McInnes #endif
101204ca555eSLois Curfman McInnes       }
101304ca555eSLois Curfman McInnes       v = bmat->a;
101404ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
101504ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
101604ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
101704ca555eSLois Curfman McInnes #else
101804ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
101904ca555eSLois Curfman McInnes #endif
102004ca555eSLois Curfman McInnes       }
10216d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
102204ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
102304ca555eSLois Curfman McInnes     }
102404ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
102504ca555eSLois Curfman McInnes       double *tmp, *tmp2;
102604ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
10270452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
10280452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1029cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
103004ca555eSLois Curfman McInnes       *norm = 0.0;
103104ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
103204ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1033579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
103404ca555eSLois Curfman McInnes       }
103504ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
103604ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1037579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
103804ca555eSLois Curfman McInnes       }
10396d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
104004ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
104104ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
104204ca555eSLois Curfman McInnes       }
10430452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
104404ca555eSLois Curfman McInnes     }
104504ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1046515d9167SLois Curfman McInnes       double ntemp = 0.0;
104704ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1048dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
104904ca555eSLois Curfman McInnes         sum = 0.0;
105004ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1051cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
105204ca555eSLois Curfman McInnes         }
1053dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
105404ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1055cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
105604ca555eSLois Curfman McInnes         }
1057515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
105804ca555eSLois Curfman McInnes       }
10596d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
106004ca555eSLois Curfman McInnes     }
106104ca555eSLois Curfman McInnes     else {
1062515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
106304ca555eSLois Curfman McInnes     }
106437fa93a5SLois Curfman McInnes   }
1065855ac2c5SLois Curfman McInnes   return 0;
1066855ac2c5SLois Curfman McInnes }
1067855ac2c5SLois Curfman McInnes 
10680de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1069b7c46309SBarry Smith {
1070b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1071dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1072416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1073416022c9SBarry Smith   Mat        B;
1074b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1075b7c46309SBarry Smith   Scalar     *array;
1076b7c46309SBarry Smith 
10773638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
10783638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1079b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1080b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1081b7c46309SBarry Smith 
1082b7c46309SBarry Smith   /* copy over the A part */
1083ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1084b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1085b7c46309SBarry Smith   row = a->rstart;
1086dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1087b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1088416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1089b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1090b7c46309SBarry Smith   }
1091b7c46309SBarry Smith   aj = Aloc->j;
10924af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1093b7c46309SBarry Smith 
1094b7c46309SBarry Smith   /* copy over the B part */
1095ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1096b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1097b7c46309SBarry Smith   row = a->rstart;
10980452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1099dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1100b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1101416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1102b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1103b7c46309SBarry Smith   }
11040452661fSBarry Smith   PetscFree(ct);
11056d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11066d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11073638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
11080de55854SLois Curfman McInnes     *matout = B;
11090de55854SLois Curfman McInnes   } else {
11100de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
11110452661fSBarry Smith     PetscFree(a->rowners);
11120de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
11130de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
11140452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
11150452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
11160de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1117a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
11180452661fSBarry Smith     PetscFree(a);
1119416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
11200452661fSBarry Smith     PetscHeaderDestroy(B);
11210de55854SLois Curfman McInnes   }
1122b7c46309SBarry Smith   return 0;
1123b7c46309SBarry Smith }
1124b7c46309SBarry Smith 
1125682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1126682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1127682d7d0cSBarry Smith {
1128682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1129682d7d0cSBarry Smith 
1130682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1131682d7d0cSBarry Smith   else return 0;
1132682d7d0cSBarry Smith }
1133682d7d0cSBarry Smith 
11345a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
11355a838052SSatish Balay {
11365a838052SSatish Balay   *bs = 1;
11375a838052SSatish Balay   return 0;
11385a838052SSatish Balay }
11395a838052SSatish Balay 
1140fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
11413d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
11422f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1143598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
11448a729477SBarry Smith /* -------------------------------------------------------------------*/
11452ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
114639e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
114744a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
114844a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1149299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1150299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1151299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
115244a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1153b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1154a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1155855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1156ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
11571eb62cbbSBarry Smith        0,
1158299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1159299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1160299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1161d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1162299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
11633e85bedfSBarry Smith        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1164b49de8d1SLois Curfman McInnes        0,0,0,
1165598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1166052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
11675a838052SSatish Balay        MatScale_MPIAIJ,0,0,0,MatGetBlockSize_MPIAIJ};
11688a729477SBarry Smith 
11691987afe7SBarry Smith /*@C
1170ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
11713a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
11723a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
11733a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
11743a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
11758a729477SBarry Smith 
11768a729477SBarry Smith    Input Parameters:
11771eb62cbbSBarry Smith .  comm - MPI communicator
11787d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
117992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
118092e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
118192e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
118292e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
118392e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
11847d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
118592e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1186ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1187ff756334SLois Curfman McInnes            (same for all local rows)
11882bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
118992e8d321SLois Curfman McInnes            diagonal portion of the local submatrix (possibly different for each row)
11902bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
11912bd5e0b2SLois Curfman McInnes            it is zero.
11922bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1193ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
11942bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
11952bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
11962bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
11978a729477SBarry Smith 
1198ff756334SLois Curfman McInnes    Output Parameter:
119944cd7ae7SLois Curfman McInnes .  A - the matrix
12008a729477SBarry Smith 
1201ff756334SLois Curfman McInnes    Notes:
1202ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1203ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
12040002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
12050002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1206ff756334SLois Curfman McInnes 
1207ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1208ff756334SLois Curfman McInnes    (possibly both).
1209ff756334SLois Curfman McInnes 
12105511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
12115511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
12125511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
12135511cfe3SLois Curfman McInnes 
12145511cfe3SLois Curfman McInnes    Options Database Keys:
12155511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
12166e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
12176e62573dSLois Curfman McInnes $        (max limit=5)
12186e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
12196e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
12206e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
12215511cfe3SLois Curfman McInnes 
1222e0245417SLois Curfman McInnes    Storage Information:
1223e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1224e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1225e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1226e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1227e0245417SLois Curfman McInnes 
1228e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
12295ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
12305ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
12315ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
12325ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1233ff756334SLois Curfman McInnes 
12345511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
12355511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
12362191d07cSBarry Smith 
1237b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1238b810aeb4SBarry Smith $         -------------------
12395511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
12405511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
12415511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
12425511cfe3SLois Curfman McInnes $         -------------------
1243b810aeb4SBarry Smith $
1244b810aeb4SBarry Smith 
12455511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
12465511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
12475511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
12485511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
12495511cfe3SLois Curfman McInnes 
12505511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
12515511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
12525511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
12533d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
125492e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
12553d323bbdSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
12563a511b96SLois Curfman McInnes 
1257dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1258ff756334SLois Curfman McInnes 
1259fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
12608a729477SBarry Smith @*/
12611eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
126244cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
12638a729477SBarry Smith {
126444cd7ae7SLois Curfman McInnes   Mat          B;
126544cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
12666abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1267416022c9SBarry Smith 
126844cd7ae7SLois Curfman McInnes   *A = 0;
126944cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
127044cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
127144cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
127244cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
127344cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
127444cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
127544cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
127644cd7ae7SLois Curfman McInnes   B->factor     = 0;
127744cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
1278d6dfbf8fSBarry Smith 
127944cd7ae7SLois Curfman McInnes   b->insertmode = NOT_SET_VALUES;
128044cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
128144cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
12821eb62cbbSBarry Smith 
1283b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
12842e0155e0SLois Curfman McInnes     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
12851987afe7SBarry Smith 
1286dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
12871eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1288d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1289dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1290dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
12911eb62cbbSBarry Smith   }
129244cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
129344cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
129444cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
129544cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
129644cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
129744cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
12981eb62cbbSBarry Smith 
12991eb62cbbSBarry Smith   /* build local table of row and column ownerships */
130044cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
130144cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1302603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
130344cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
130444cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
130544cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
130644cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
13078a729477SBarry Smith   }
130844cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
130944cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
131044cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
131144cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
131244cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
131344cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
13148a729477SBarry Smith   }
131544cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
131644cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
13178a729477SBarry Smith 
13185ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
131944cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
132044cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
13217b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
132244cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
132344cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
13248a729477SBarry Smith 
13251eb62cbbSBarry Smith   /* build cache for off array entries formed */
132644cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
132744cd7ae7SLois Curfman McInnes   b->colmap      = 0;
132844cd7ae7SLois Curfman McInnes   b->garray      = 0;
132944cd7ae7SLois Curfman McInnes   b->roworiented = 1;
13308a729477SBarry Smith 
13311eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
133244cd7ae7SLois Curfman McInnes   b->lvec      = 0;
133344cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
13348a729477SBarry Smith 
13357a0afa10SBarry Smith   /* stuff for MatGetRow() */
133644cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
133744cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
133844cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
13397a0afa10SBarry Smith 
134044cd7ae7SLois Curfman McInnes   *A = B;
1341d6dfbf8fSBarry Smith   return 0;
1342d6dfbf8fSBarry Smith }
1343c74985f6SBarry Smith 
13443d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1345d6dfbf8fSBarry Smith {
1346d6dfbf8fSBarry Smith   Mat        mat;
1347416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1348a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1349d6dfbf8fSBarry Smith 
1350416022c9SBarry Smith   *newmat       = 0;
13510452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1352a5a9c739SBarry Smith   PLogObjectCreate(mat);
13530452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1354416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
135544a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
135644a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1357d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1358c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1359d6dfbf8fSBarry Smith 
136044cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
136144cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
136244cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
136344cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1364d6dfbf8fSBarry Smith 
1365416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1366416022c9SBarry Smith   a->rend         = oldmat->rend;
1367416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1368416022c9SBarry Smith   a->cend         = oldmat->cend;
136917699dbbSLois Curfman McInnes   a->size         = oldmat->size;
137017699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
137164119d58SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
1372bcd2baecSBarry Smith   a->rowvalues    = 0;
1373bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1374d6dfbf8fSBarry Smith 
1375603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1376603f58a4SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1377603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1378603f58a4SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1379416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
13802ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
13810452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1382416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1383416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1384416022c9SBarry Smith   } else a->colmap = 0;
1385ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
13860452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1387464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1388416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1389416022c9SBarry Smith   } else a->garray = 0;
1390d6dfbf8fSBarry Smith 
1391416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1392416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1393a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1394416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1395416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1396416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1397416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1398416022c9SBarry Smith   PLogObjectParent(mat,a->B);
13995dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
140025cdf11fSBarry Smith   if (flg) {
1401682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1402682d7d0cSBarry Smith   }
14038a729477SBarry Smith   *newmat = mat;
14048a729477SBarry Smith   return 0;
14058a729477SBarry Smith }
1406416022c9SBarry Smith 
140777c4ece6SBarry Smith #include "sys.h"
1408416022c9SBarry Smith 
140919bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1410416022c9SBarry Smith {
1411d65a2f8fSBarry Smith   Mat          A;
1412416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1413d65a2f8fSBarry Smith   Scalar       *vals,*svals;
141419bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1415416022c9SBarry Smith   MPI_Status   status;
141617699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1417d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
141819bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1419416022c9SBarry Smith 
142017699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
142117699dbbSLois Curfman McInnes   if (!rank) {
142290ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
142377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1424c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1425416022c9SBarry Smith   }
1426416022c9SBarry Smith 
1427416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1428416022c9SBarry Smith   M = header[1]; N = header[2];
1429416022c9SBarry Smith   /* determine ownership of all rows */
143017699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
14310452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1432416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1433416022c9SBarry Smith   rowners[0] = 0;
143417699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1435416022c9SBarry Smith     rowners[i] += rowners[i-1];
1436416022c9SBarry Smith   }
143717699dbbSLois Curfman McInnes   rstart = rowners[rank];
143817699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1439416022c9SBarry Smith 
1440416022c9SBarry Smith   /* distribute row lengths to all processors */
14410452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1442416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
144317699dbbSLois Curfman McInnes   if (!rank) {
14440452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
144577c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
14460452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
144717699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1448d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
14490452661fSBarry Smith     PetscFree(sndcounts);
1450416022c9SBarry Smith   }
1451416022c9SBarry Smith   else {
1452416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1453416022c9SBarry Smith   }
1454416022c9SBarry Smith 
145517699dbbSLois Curfman McInnes   if (!rank) {
1456416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
14570452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1458cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
145917699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1460416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1461416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1462416022c9SBarry Smith       }
1463416022c9SBarry Smith     }
14640452661fSBarry Smith     PetscFree(rowlengths);
1465416022c9SBarry Smith 
1466416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1467416022c9SBarry Smith     maxnz = 0;
146817699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
14690452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1470416022c9SBarry Smith     }
14710452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1472416022c9SBarry Smith 
1473416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1474416022c9SBarry Smith     nz = procsnz[0];
14750452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
147677c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1477d65a2f8fSBarry Smith 
1478d65a2f8fSBarry Smith     /* read in every one elses and ship off */
147917699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1480d65a2f8fSBarry Smith       nz = procsnz[i];
148177c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1482d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1483d65a2f8fSBarry Smith     }
14840452661fSBarry Smith     PetscFree(cols);
1485416022c9SBarry Smith   }
1486416022c9SBarry Smith   else {
1487416022c9SBarry Smith     /* determine buffer space needed for message */
1488416022c9SBarry Smith     nz = 0;
1489416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1490416022c9SBarry Smith       nz += ourlens[i];
1491416022c9SBarry Smith     }
14920452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1493416022c9SBarry Smith 
1494416022c9SBarry Smith     /* receive message of column indices*/
1495d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1496416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1497c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1498416022c9SBarry Smith   }
1499416022c9SBarry Smith 
1500416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1501cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1502416022c9SBarry Smith   jj = 0;
1503416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1504416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1505d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1506416022c9SBarry Smith       jj++;
1507416022c9SBarry Smith     }
1508416022c9SBarry Smith   }
1509d65a2f8fSBarry Smith 
1510d65a2f8fSBarry Smith   /* create our matrix */
1511416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1512416022c9SBarry Smith     ourlens[i] -= offlens[i];
1513416022c9SBarry Smith   }
1514d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1515d65a2f8fSBarry Smith   A = *newmat;
15166d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
1517d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1518d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1519d65a2f8fSBarry Smith   }
1520416022c9SBarry Smith 
152117699dbbSLois Curfman McInnes   if (!rank) {
15220452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1523416022c9SBarry Smith 
1524416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1525416022c9SBarry Smith     nz = procsnz[0];
152677c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1527d65a2f8fSBarry Smith 
1528d65a2f8fSBarry Smith     /* insert into matrix */
1529d65a2f8fSBarry Smith     jj      = rstart;
1530d65a2f8fSBarry Smith     smycols = mycols;
1531d65a2f8fSBarry Smith     svals   = vals;
1532d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1533d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1534d65a2f8fSBarry Smith       smycols += ourlens[i];
1535d65a2f8fSBarry Smith       svals   += ourlens[i];
1536d65a2f8fSBarry Smith       jj++;
1537416022c9SBarry Smith     }
1538416022c9SBarry Smith 
1539d65a2f8fSBarry Smith     /* read in other processors and ship out */
154017699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1541416022c9SBarry Smith       nz = procsnz[i];
154277c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1543d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1544416022c9SBarry Smith     }
15450452661fSBarry Smith     PetscFree(procsnz);
1546416022c9SBarry Smith   }
1547d65a2f8fSBarry Smith   else {
1548d65a2f8fSBarry Smith     /* receive numeric values */
15490452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1550416022c9SBarry Smith 
1551d65a2f8fSBarry Smith     /* receive message of values*/
1552d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1553d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1554c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1555d65a2f8fSBarry Smith 
1556d65a2f8fSBarry Smith     /* insert into matrix */
1557d65a2f8fSBarry Smith     jj      = rstart;
1558d65a2f8fSBarry Smith     smycols = mycols;
1559d65a2f8fSBarry Smith     svals   = vals;
1560d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1561d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1562d65a2f8fSBarry Smith       smycols += ourlens[i];
1563d65a2f8fSBarry Smith       svals   += ourlens[i];
1564d65a2f8fSBarry Smith       jj++;
1565d65a2f8fSBarry Smith     }
1566d65a2f8fSBarry Smith   }
15670452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1568d65a2f8fSBarry Smith 
15696d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15706d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1571416022c9SBarry Smith   return 0;
1572416022c9SBarry Smith }
1573