xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 36ce49901d62a7ac85629aac7e4131974f34d8c2)
1905e6a2fSBarry Smith 
2cb512458SBarry Smith #ifndef lint
3*36ce4990SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.169 1996/09/23 18:20:49 bsmith Exp bsmith $";
4cb512458SBarry Smith #endif
58a729477SBarry Smith 
670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
8d9942c19SSatish Balay #include "src/inline/spops.h"
98a729477SBarry Smith 
109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
129e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
139e25ed09SBarry Smith length of colmap equals the global matrix length.
149e25ed09SBarry Smith */
15905e6a2fSBarry Smith static int CreateColmap_MPIAIJ_Private(Mat mat)
169e25ed09SBarry Smith {
1744a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
18ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
19905e6a2fSBarry Smith   int        n = B->n,i;
20dbb450caSBarry Smith 
210452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
22464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
23cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
24905e6a2fSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
259e25ed09SBarry Smith   return 0;
269e25ed09SBarry Smith }
279e25ed09SBarry Smith 
282493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
292493cbb0SBarry Smith 
303b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
313b2fbd54SBarry Smith                            PetscTruth *done)
32299609e3SLois Curfman McInnes {
33299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
34299609e3SLois Curfman McInnes   int        ierr;
3517699dbbSLois Curfman McInnes   if (aij->size == 1) {
363b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
373b2fbd54SBarry Smith   } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel");
383b2fbd54SBarry Smith   return 0;
393b2fbd54SBarry Smith }
403b2fbd54SBarry Smith 
413b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
423b2fbd54SBarry Smith                                PetscTruth *done)
433b2fbd54SBarry Smith {
443b2fbd54SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
453b2fbd54SBarry Smith   int        ierr;
463b2fbd54SBarry Smith   if (aij->size == 1) {
473b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
483b2fbd54SBarry Smith   } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel");
49299609e3SLois Curfman McInnes   return 0;
50299609e3SLois Curfman McInnes }
51299609e3SLois Curfman McInnes 
524b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
538a729477SBarry Smith {
5444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
554b0e389bSBarry Smith   Scalar     value;
561eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
571eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
58905e6a2fSBarry Smith   int        roworiented = aij->roworiented;
598a729477SBarry Smith 
6064119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
6148d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
628a729477SBarry Smith   }
631eb62cbbSBarry Smith   aij->insertmode = addv;
648a729477SBarry Smith   for ( i=0; i<m; i++ ) {
654b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
664b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
674b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
684b0e389bSBarry Smith       row = im[i] - rstart;
691eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
704b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
714b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
724b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
734b0e389bSBarry Smith           col = in[j] - cstart;
744b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
754b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
761eb62cbbSBarry Smith         }
771eb62cbbSBarry Smith         else {
78227d817aSBarry Smith           if (mat->was_assembled) {
79905e6a2fSBarry Smith             if (!aij->colmap) {
80905e6a2fSBarry Smith               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
81905e6a2fSBarry Smith             }
82905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
83ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
842493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
854b0e389bSBarry Smith               col =  in[j];
86d6dfbf8fSBarry Smith             }
879e25ed09SBarry Smith           }
884b0e389bSBarry Smith           else col = in[j];
894b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
904b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
911eb62cbbSBarry Smith         }
921eb62cbbSBarry Smith       }
931eb62cbbSBarry Smith     }
941eb62cbbSBarry Smith     else {
954b0e389bSBarry Smith       if (roworiented) {
964b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
974b0e389bSBarry Smith       }
984b0e389bSBarry Smith       else {
994b0e389bSBarry Smith         row = im[i];
1004b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
1014b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
1024b0e389bSBarry Smith         }
1034b0e389bSBarry Smith       }
1041eb62cbbSBarry Smith     }
1058a729477SBarry Smith   }
1068a729477SBarry Smith   return 0;
1078a729477SBarry Smith }
1088a729477SBarry Smith 
109b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
110b49de8d1SLois Curfman McInnes {
111b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
112b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
113b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
114b49de8d1SLois Curfman McInnes 
115b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
116b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
117b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
118b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
119b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
120b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
121b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
122b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
123b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
124b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
125b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
126b49de8d1SLois Curfman McInnes         }
127b49de8d1SLois Curfman McInnes         else {
128905e6a2fSBarry Smith           if (!aij->colmap) {
129905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
130905e6a2fSBarry Smith           }
131905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
132e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
133d9d09a02SSatish Balay           else {
134b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
135b49de8d1SLois Curfman McInnes           }
136b49de8d1SLois Curfman McInnes         }
137b49de8d1SLois Curfman McInnes       }
138d9d09a02SSatish Balay     }
139b49de8d1SLois Curfman McInnes     else {
140b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
141b49de8d1SLois Curfman McInnes     }
142b49de8d1SLois Curfman McInnes   }
143b49de8d1SLois Curfman McInnes   return 0;
144b49de8d1SLois Curfman McInnes }
145b49de8d1SLois Curfman McInnes 
146fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1478a729477SBarry Smith {
14844a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
149d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
15017699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
15117699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1521eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1536abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1541eb62cbbSBarry Smith   InsertMode  addv;
1551eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1561eb62cbbSBarry Smith 
1571eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
158d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
159dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
160bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1611eb62cbbSBarry Smith   }
1621eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1631eb62cbbSBarry Smith 
1641eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1650452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
166cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1670452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1681eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1691eb62cbbSBarry Smith     idx = aij->stash.idx[i];
17017699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1711eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1721eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1738a729477SBarry Smith       }
1748a729477SBarry Smith     }
1758a729477SBarry Smith   }
17617699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1771eb62cbbSBarry Smith 
1781eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1790452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
18017699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
18117699dbbSLois Curfman McInnes   nreceives = work[rank];
18217699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
18317699dbbSLois Curfman McInnes   nmax = work[rank];
1840452661fSBarry Smith   PetscFree(work);
1851eb62cbbSBarry Smith 
1861eb62cbbSBarry Smith   /* post receives:
1871eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1881eb62cbbSBarry Smith      (global index,value) we store the global index as a double
189d6dfbf8fSBarry Smith      to simplify the message passing.
1901eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1911eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1921eb62cbbSBarry Smith      this is a lot of wasted space.
1931eb62cbbSBarry Smith 
1941eb62cbbSBarry Smith 
1951eb62cbbSBarry Smith        This could be done better.
1961eb62cbbSBarry Smith   */
1970452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
19878b31e54SBarry Smith   CHKPTRQ(rvalues);
1990452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
20078b31e54SBarry Smith   CHKPTRQ(recv_waits);
2011eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
202416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
2031eb62cbbSBarry Smith               comm,recv_waits+i);
2041eb62cbbSBarry Smith   }
2051eb62cbbSBarry Smith 
2061eb62cbbSBarry Smith   /* do sends:
2071eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
2081eb62cbbSBarry Smith          the ith processor
2091eb62cbbSBarry Smith   */
2100452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
2110452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
21278b31e54SBarry Smith   CHKPTRQ(send_waits);
2130452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
2141eb62cbbSBarry Smith   starts[0] = 0;
21517699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2161eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2171eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2181eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2191eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2201eb62cbbSBarry Smith   }
2210452661fSBarry Smith   PetscFree(owner);
2221eb62cbbSBarry Smith   starts[0] = 0;
22317699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2241eb62cbbSBarry Smith   count = 0;
22517699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2261eb62cbbSBarry Smith     if (procs[i]) {
227416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2281eb62cbbSBarry Smith                 comm,send_waits+count++);
2291eb62cbbSBarry Smith     }
2301eb62cbbSBarry Smith   }
2310452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2321eb62cbbSBarry Smith 
2331eb62cbbSBarry Smith   /* Free cache space */
2342191d07cSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
23578b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2361eb62cbbSBarry Smith 
2371eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2381eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2391eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2401eb62cbbSBarry Smith   aij->rmax       = nmax;
2411eb62cbbSBarry Smith 
2421eb62cbbSBarry Smith   return 0;
2431eb62cbbSBarry Smith }
24444a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2451eb62cbbSBarry Smith 
246fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2471eb62cbbSBarry Smith {
24844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
2491eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
250416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
251905e6a2fSBarry Smith   int         row,col,other_disassembled;
2521eb62cbbSBarry Smith   Scalar      *values,val;
2531eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2541eb62cbbSBarry Smith 
2551eb62cbbSBarry Smith   /*  wait on receives */
2561eb62cbbSBarry Smith   while (count) {
257d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2581eb62cbbSBarry Smith     /* unpack receives into our local space */
259d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
260c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2611eb62cbbSBarry Smith     n = n/3;
2621eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
263227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
264227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2651eb62cbbSBarry Smith       val = values[3*i+2];
2661eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2671eb62cbbSBarry Smith         col -= aij->cstart;
2681eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2691eb62cbbSBarry Smith       }
2701eb62cbbSBarry Smith       else {
27155a1b374SBarry Smith         if (mat->was_assembled || mat->assembled) {
272905e6a2fSBarry Smith           if (!aij->colmap) {
273905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
274905e6a2fSBarry Smith           }
275905e6a2fSBarry Smith           col = aij->colmap[col] - 1;
276ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2772493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
278227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
279d6dfbf8fSBarry Smith           }
2809e25ed09SBarry Smith         }
2811eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2821eb62cbbSBarry Smith       }
2831eb62cbbSBarry Smith     }
2841eb62cbbSBarry Smith     count--;
2851eb62cbbSBarry Smith   }
2860452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2871eb62cbbSBarry Smith 
2881eb62cbbSBarry Smith   /* wait on sends */
2891eb62cbbSBarry Smith   if (aij->nsends) {
2900452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
29178b31e54SBarry Smith     CHKPTRQ(send_status);
2921eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2930452661fSBarry Smith     PetscFree(send_status);
2941eb62cbbSBarry Smith   }
2950452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2961eb62cbbSBarry Smith 
29764119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
29878b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
29978b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
3001eb62cbbSBarry Smith 
3012493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
3022493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
303227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
304227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
3052493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
3062493cbb0SBarry Smith   }
3072493cbb0SBarry Smith 
3086d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
30978b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
3105e42470aSBarry Smith   }
31178b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
31278b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
3135e42470aSBarry Smith 
3147a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
3158a729477SBarry Smith   return 0;
3168a729477SBarry Smith }
3178a729477SBarry Smith 
3182ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3191eb62cbbSBarry Smith {
32044a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
321dbd7a890SLois Curfman McInnes   int        ierr;
32278b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
32378b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3241eb62cbbSBarry Smith   return 0;
3251eb62cbbSBarry Smith }
3261eb62cbbSBarry Smith 
3271eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3281eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3291eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3301eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3311eb62cbbSBarry Smith    routine.
3321eb62cbbSBarry Smith */
33344a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3341eb62cbbSBarry Smith {
33544a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
33617699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3376abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
33817699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3395392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
340d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
341d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3421eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3431eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3441eb62cbbSBarry Smith   IS             istmp;
3451eb62cbbSBarry Smith 
34677c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
34778b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3481eb62cbbSBarry Smith 
3491eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3500452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
351cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3520452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3531eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3541eb62cbbSBarry Smith     idx = rows[i];
3551eb62cbbSBarry Smith     found = 0;
35617699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3571eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3581eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3591eb62cbbSBarry Smith       }
3601eb62cbbSBarry Smith     }
361bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3621eb62cbbSBarry Smith   }
36317699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3641eb62cbbSBarry Smith 
3651eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3660452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
36717699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
36817699dbbSLois Curfman McInnes   nrecvs = work[rank];
36917699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
37017699dbbSLois Curfman McInnes   nmax = work[rank];
3710452661fSBarry Smith   PetscFree(work);
3721eb62cbbSBarry Smith 
3731eb62cbbSBarry Smith   /* post receives:   */
3740452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
37578b31e54SBarry Smith   CHKPTRQ(rvalues);
3760452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
37778b31e54SBarry Smith   CHKPTRQ(recv_waits);
3781eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
379416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3801eb62cbbSBarry Smith   }
3811eb62cbbSBarry Smith 
3821eb62cbbSBarry Smith   /* do sends:
3831eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3841eb62cbbSBarry Smith          the ith processor
3851eb62cbbSBarry Smith   */
3860452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3870452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
38878b31e54SBarry Smith   CHKPTRQ(send_waits);
3890452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3901eb62cbbSBarry Smith   starts[0] = 0;
39117699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3921eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3931eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3941eb62cbbSBarry Smith   }
3951eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3961eb62cbbSBarry Smith 
3971eb62cbbSBarry Smith   starts[0] = 0;
39817699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3991eb62cbbSBarry Smith   count = 0;
40017699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
4011eb62cbbSBarry Smith     if (procs[i]) {
402416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
4031eb62cbbSBarry Smith     }
4041eb62cbbSBarry Smith   }
4050452661fSBarry Smith   PetscFree(starts);
4061eb62cbbSBarry Smith 
40717699dbbSLois Curfman McInnes   base = owners[rank];
4081eb62cbbSBarry Smith 
4091eb62cbbSBarry Smith   /*  wait on receives */
4100452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
4111eb62cbbSBarry Smith   source = lens + nrecvs;
4121eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
4131eb62cbbSBarry Smith   while (count) {
414d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
4151eb62cbbSBarry Smith     /* unpack receives into our local space */
4161eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
417d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
418d6dfbf8fSBarry Smith     lens[imdex]  = n;
4191eb62cbbSBarry Smith     slen += n;
4201eb62cbbSBarry Smith     count--;
4211eb62cbbSBarry Smith   }
4220452661fSBarry Smith   PetscFree(recv_waits);
4231eb62cbbSBarry Smith 
4241eb62cbbSBarry Smith   /* move the data into the send scatter */
4250452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4261eb62cbbSBarry Smith   count = 0;
4271eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4281eb62cbbSBarry Smith     values = rvalues + i*nmax;
4291eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4301eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4311eb62cbbSBarry Smith     }
4321eb62cbbSBarry Smith   }
4330452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4340452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4351eb62cbbSBarry Smith 
4361eb62cbbSBarry Smith   /* actually zap the local rows */
437537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
438464493b3SBarry Smith   PLogObjectParent(A,istmp);
4390452661fSBarry Smith   PetscFree(lrows);
44078b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
44178b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
44278b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4431eb62cbbSBarry Smith 
4441eb62cbbSBarry Smith   /* wait on sends */
4451eb62cbbSBarry Smith   if (nsends) {
4460452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
44778b31e54SBarry Smith     CHKPTRQ(send_status);
4481eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4490452661fSBarry Smith     PetscFree(send_status);
4501eb62cbbSBarry Smith   }
4510452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4521eb62cbbSBarry Smith 
4531eb62cbbSBarry Smith   return 0;
4541eb62cbbSBarry Smith }
4551eb62cbbSBarry Smith 
456416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4571eb62cbbSBarry Smith {
458416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
459fbd6ef76SBarry Smith   int        ierr,nt;
460416022c9SBarry Smith 
461a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
462fbd6ef76SBarry Smith   if (nt != a->n) {
46347b4a8eaSLois Curfman McInnes     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx");
464fbd6ef76SBarry Smith   }
46564119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
46638597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
46764119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
46838597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4691eb62cbbSBarry Smith   return 0;
4701eb62cbbSBarry Smith }
4711eb62cbbSBarry Smith 
472416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
473da3a660dSBarry Smith {
474416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
475da3a660dSBarry Smith   int        ierr;
47664119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
47738597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
47864119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
47938597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
480da3a660dSBarry Smith   return 0;
481da3a660dSBarry Smith }
482da3a660dSBarry Smith 
483416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
484da3a660dSBarry Smith {
485416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
486da3a660dSBarry Smith   int        ierr;
487da3a660dSBarry Smith 
488da3a660dSBarry Smith   /* do nondiagonal part */
48938597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
490da3a660dSBarry Smith   /* send it on its way */
491537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
492da3a660dSBarry Smith   /* do local part */
49338597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
494da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
495da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
496da3a660dSBarry Smith   /* but is not perhaps always true. */
497537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
498da3a660dSBarry Smith   return 0;
499da3a660dSBarry Smith }
500da3a660dSBarry Smith 
501416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
502da3a660dSBarry Smith {
503416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
504da3a660dSBarry Smith   int        ierr;
505da3a660dSBarry Smith 
506da3a660dSBarry Smith   /* do nondiagonal part */
50738597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
508da3a660dSBarry Smith   /* send it on its way */
509537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
510da3a660dSBarry Smith   /* do local part */
51138597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
512da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
513da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
514da3a660dSBarry Smith   /* but is not perhaps always true. */
515416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
51664119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
517da3a660dSBarry Smith   return 0;
518da3a660dSBarry Smith }
519da3a660dSBarry Smith 
5201eb62cbbSBarry Smith /*
5211eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5221eb62cbbSBarry Smith    diagonal block
5231eb62cbbSBarry Smith */
524416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5251eb62cbbSBarry Smith {
526416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
52744cd7ae7SLois Curfman McInnes   if (a->M != a->N)
52844cd7ae7SLois Curfman McInnes     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
5295baf8537SBarry Smith   if (a->rstart != a->cstart || a->rend != a->cend) {
5305baf8537SBarry Smith     SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition");  }
531416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5321eb62cbbSBarry Smith }
5331eb62cbbSBarry Smith 
534052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
535052efed2SBarry Smith {
536052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
537052efed2SBarry Smith   int        ierr;
538052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
539052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
540052efed2SBarry Smith   return 0;
541052efed2SBarry Smith }
542052efed2SBarry Smith 
54344a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5441eb62cbbSBarry Smith {
5451eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
54644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5471eb62cbbSBarry Smith   int        ierr;
548a5a9c739SBarry Smith #if defined(PETSC_LOG)
5496e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
550a5a9c739SBarry Smith #endif
5510452661fSBarry Smith   PetscFree(aij->rowners);
55278b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
55378b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5540452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5550452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5561eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
557a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5587a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
5590452661fSBarry Smith   PetscFree(aij);
560a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5610452661fSBarry Smith   PetscHeaderDestroy(mat);
5621eb62cbbSBarry Smith   return 0;
5631eb62cbbSBarry Smith }
5647857610eSBarry Smith #include "draw.h"
565b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
566ee50ffe9SBarry Smith 
567416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5681eb62cbbSBarry Smith {
569416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
570416022c9SBarry Smith   int         ierr;
571416022c9SBarry Smith 
57217699dbbSLois Curfman McInnes   if (aij->size == 1) {
573416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
574416022c9SBarry Smith   }
575a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
576416022c9SBarry Smith   return 0;
577416022c9SBarry Smith }
578416022c9SBarry Smith 
579d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
580416022c9SBarry Smith {
58144a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
582dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
583a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
584d636dbe3SBarry Smith   FILE        *fd;
58519bcc07fSBarry Smith   ViewerType  vtype;
586416022c9SBarry Smith 
58719bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
58819bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
58990ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
59090ace30eSBarry Smith     if (format == ASCII_FORMAT_INFO_DETAILED) {
5914e220ebcSLois Curfman McInnes       MatInfo info;
5924e220ebcSLois Curfman McInnes       int     flg;
593a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
59490ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
5954e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
59695e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
59777c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
59895e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
5994e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
60095e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
6014e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
6024e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
6034e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
6044e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
6054e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
606a56f8943SBarry Smith       fflush(fd);
60777c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
608a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
609416022c9SBarry Smith       return 0;
610416022c9SBarry Smith     }
61190ace30eSBarry Smith     else if (format == ASCII_FORMAT_INFO) {
61208480c60SBarry Smith       return 0;
61308480c60SBarry Smith     }
614416022c9SBarry Smith   }
615416022c9SBarry Smith 
61619bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
61719bcc07fSBarry Smith     Draw       draw;
61819bcc07fSBarry Smith     PetscTruth isnull;
61919bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
62019bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
62119bcc07fSBarry Smith   }
62219bcc07fSBarry Smith 
62319bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
62490ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
62577c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
626d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
62717699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
6281eb62cbbSBarry Smith            aij->cend);
62978b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
63078b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
631d13ab20cSBarry Smith     fflush(fd);
63277c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
633d13ab20cSBarry Smith   }
634416022c9SBarry Smith   else {
635a56f8943SBarry Smith     int size = aij->size;
636a56f8943SBarry Smith     rank = aij->rank;
63717699dbbSLois Curfman McInnes     if (size == 1) {
63878b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
63995373324SBarry Smith     }
64095373324SBarry Smith     else {
64195373324SBarry Smith       /* assemble the entire matrix onto first processor. */
64295373324SBarry Smith       Mat         A;
643ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6442eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
64595373324SBarry Smith       Scalar      *a;
6462ee70a88SLois Curfman McInnes 
64717699dbbSLois Curfman McInnes       if (!rank) {
648b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
649c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
65095373324SBarry Smith       }
65195373324SBarry Smith       else {
652b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
653c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
65495373324SBarry Smith       }
655464493b3SBarry Smith       PLogObjectParent(mat,A);
656416022c9SBarry Smith 
65795373324SBarry Smith       /* copy over the A part */
658ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6592ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
66095373324SBarry Smith       row = aij->rstart;
661dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
66295373324SBarry Smith       for ( i=0; i<m; i++ ) {
663416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
66495373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
66595373324SBarry Smith       }
6662ee70a88SLois Curfman McInnes       aj = Aloc->j;
667dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
66895373324SBarry Smith 
66995373324SBarry Smith       /* copy over the B part */
670ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6712ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
67295373324SBarry Smith       row = aij->rstart;
6730452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
674dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
67595373324SBarry Smith       for ( i=0; i<m; i++ ) {
676416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
67795373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
67895373324SBarry Smith       }
6790452661fSBarry Smith       PetscFree(ct);
6806d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
6816d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
68217699dbbSLois Curfman McInnes       if (!rank) {
68378b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
68495373324SBarry Smith       }
68578b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
68695373324SBarry Smith     }
68795373324SBarry Smith   }
6881eb62cbbSBarry Smith   return 0;
6891eb62cbbSBarry Smith }
6901eb62cbbSBarry Smith 
691416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
692416022c9SBarry Smith {
693416022c9SBarry Smith   Mat         mat = (Mat) obj;
694416022c9SBarry Smith   int         ierr;
69519bcc07fSBarry Smith   ViewerType  vtype;
696416022c9SBarry Smith 
69719bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
69819bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
69919bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
700d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
701416022c9SBarry Smith   }
70219bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
703416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
704416022c9SBarry Smith   }
705416022c9SBarry Smith   return 0;
706416022c9SBarry Smith }
707416022c9SBarry Smith 
7081eb62cbbSBarry Smith /*
7091eb62cbbSBarry Smith     This has to provide several versions.
7101eb62cbbSBarry Smith 
7111eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
7121eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
713d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
7141eb62cbbSBarry Smith */
715fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
716dbb450caSBarry Smith                            double fshift,int its,Vec xx)
7178a729477SBarry Smith {
71844a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
719d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
720ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
721c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
7226abc6512SBarry Smith   int        ierr,*idx, *diag;
723416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
7248a729477SBarry Smith 
725d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
726dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
727dbb450caSBarry Smith   ls = ls + shift;
728ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
729d6dfbf8fSBarry Smith   diag = A->diag;
730c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
731da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
73238597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
733da3a660dSBarry Smith     }
73464119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
73578b31e54SBarry Smith     CHKERRQ(ierr);
73664119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
73778b31e54SBarry Smith     CHKERRQ(ierr);
738d6dfbf8fSBarry Smith     while (its--) {
739d6dfbf8fSBarry Smith       /* go down through the rows */
740d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
741d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
742dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
743dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
744d6dfbf8fSBarry Smith         sum  = b[i];
745d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
746dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
747d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
748dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
749dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
750d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
75155a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
752d6dfbf8fSBarry Smith       }
753d6dfbf8fSBarry Smith       /* come up through the rows */
754d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
755d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
756dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
757dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
758d6dfbf8fSBarry Smith         sum  = b[i];
759d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
760dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
761d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
762dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
763dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
764d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
76555a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
766d6dfbf8fSBarry Smith       }
767d6dfbf8fSBarry Smith     }
768d6dfbf8fSBarry Smith   }
769d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
770da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
77138597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
772da3a660dSBarry Smith     }
77364119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
77478b31e54SBarry Smith     CHKERRQ(ierr);
77564119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
77678b31e54SBarry Smith     CHKERRQ(ierr);
777d6dfbf8fSBarry Smith     while (its--) {
778d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
779d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
780dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
781dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
782d6dfbf8fSBarry Smith         sum  = b[i];
783d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
784dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
785d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
786dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
787dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
788d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
78955a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
790d6dfbf8fSBarry Smith       }
791d6dfbf8fSBarry Smith     }
792d6dfbf8fSBarry Smith   }
793d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
794da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
79538597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
796da3a660dSBarry Smith     }
79764119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
79878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
79964119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
80078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
801d6dfbf8fSBarry Smith     while (its--) {
802d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
803d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
804dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
805dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
806d6dfbf8fSBarry Smith         sum  = b[i];
807d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
808dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
809d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
810dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
811dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
812d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
81355a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
814d6dfbf8fSBarry Smith       }
815d6dfbf8fSBarry Smith     }
816d6dfbf8fSBarry Smith   }
817c16cb8f2SBarry Smith   else {
818c16cb8f2SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
819c16cb8f2SBarry Smith   }
8208a729477SBarry Smith   return 0;
8218a729477SBarry Smith }
822a66be287SLois Curfman McInnes 
8234e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
824a66be287SLois Curfman McInnes {
825a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
826a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
8274e220ebcSLois Curfman McInnes   int        ierr;
8284e220ebcSLois Curfman McInnes   double     isend[5], irecv[5];
829a66be287SLois Curfman McInnes 
8304e220ebcSLois Curfman McInnes   info->rows_global    = (double)mat->M;
8314e220ebcSLois Curfman McInnes   info->columns_global = (double)mat->N;
8324e220ebcSLois Curfman McInnes   info->rows_local     = (double)mat->m;
8334e220ebcSLois Curfman McInnes   info->columns_local  = (double)mat->N;
8344e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
8354e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
8364e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
8374e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
8384e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
8394e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
8404e220ebcSLois Curfman McInnes   isend[3] += info->memory;  isend[4] += info->mallocs;
841a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
8424e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
8434e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
8444e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
8454e220ebcSLois Curfman McInnes     info->memory       = isend[3];
8464e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
847a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
8484e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
8494e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8504e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8514e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8524e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8534e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
854a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
8554e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
8564e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8574e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8584e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8594e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8604e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
861a66be287SLois Curfman McInnes   }
8624e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
8634e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
8644e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
8654e220ebcSLois Curfman McInnes 
866a66be287SLois Curfman McInnes   return 0;
867a66be287SLois Curfman McInnes }
868a66be287SLois Curfman McInnes 
869299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
870299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
871299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
872299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
873299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
874299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
875299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
876299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
877299609e3SLois Curfman McInnes 
878416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
879c74985f6SBarry Smith {
880c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
881c74985f6SBarry Smith 
8826d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
8836d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
8846d4a8577SBarry Smith       op == MAT_COLUMNS_SORTED ||
8856d4a8577SBarry Smith       op == MAT_ROW_ORIENTED) {
886c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
887c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
888c74985f6SBarry Smith   }
8896d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
8906d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8916d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
8926d4a8577SBarry Smith            op == MAT_YES_NEW_DIAGONALS)
89394a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
8946d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
8954b0e389bSBarry Smith     a->roworiented = 0;
8964b0e389bSBarry Smith     MatSetOption(a->A,op);
8974b0e389bSBarry Smith     MatSetOption(a->B,op);
8984b0e389bSBarry Smith   }
8996d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
9006d4a8577SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");}
901c0bbcb79SLois Curfman McInnes   else
9024d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
903c74985f6SBarry Smith   return 0;
904c74985f6SBarry Smith }
905c74985f6SBarry Smith 
906d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
907c74985f6SBarry Smith {
90844a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
909c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
910c74985f6SBarry Smith   return 0;
911c74985f6SBarry Smith }
912c74985f6SBarry Smith 
913d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
914c74985f6SBarry Smith {
91544a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
916b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
917c74985f6SBarry Smith   return 0;
918c74985f6SBarry Smith }
919c74985f6SBarry Smith 
920d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
921c74985f6SBarry Smith {
92244a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
923c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
924c74985f6SBarry Smith   return 0;
925c74985f6SBarry Smith }
926c74985f6SBarry Smith 
9276d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
9286d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
9296d84be18SBarry Smith 
9306d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
93139e00950SLois Curfman McInnes {
932154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
93370f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
934154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
935154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
93670f0671dSBarry Smith   int        *cmap, *idx_p;
93739e00950SLois Curfman McInnes 
9387a0afa10SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
9397a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
9407a0afa10SBarry Smith 
94170f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
9427a0afa10SBarry Smith     /*
9437a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
9447a0afa10SBarry Smith     */
9457a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
946c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
947c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
9487a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
9497a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
9507a0afa10SBarry Smith     }
9517a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
9527a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
9537a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
9547a0afa10SBarry Smith   }
9557a0afa10SBarry Smith 
956b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
957abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
95839e00950SLois Curfman McInnes 
959154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
960154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
961154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
96238597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
96338597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
964154123eaSLois Curfman McInnes   nztot = nzA + nzB;
965154123eaSLois Curfman McInnes 
96670f0671dSBarry Smith   cmap  = mat->garray;
967154123eaSLois Curfman McInnes   if (v  || idx) {
968154123eaSLois Curfman McInnes     if (nztot) {
969154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
97070f0671dSBarry Smith       int imark = -1;
971154123eaSLois Curfman McInnes       if (v) {
97270f0671dSBarry Smith         *v = v_p = mat->rowvalues;
97339e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
97470f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
975154123eaSLois Curfman McInnes           else break;
976154123eaSLois Curfman McInnes         }
977154123eaSLois Curfman McInnes         imark = i;
97870f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
97970f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
980154123eaSLois Curfman McInnes       }
981154123eaSLois Curfman McInnes       if (idx) {
98270f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
98370f0671dSBarry Smith         if (imark > -1) {
98470f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
98570f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
98670f0671dSBarry Smith           }
98770f0671dSBarry Smith         } else {
988154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
98970f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
990154123eaSLois Curfman McInnes             else break;
991154123eaSLois Curfman McInnes           }
992154123eaSLois Curfman McInnes           imark = i;
99370f0671dSBarry Smith         }
99470f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
99570f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
99639e00950SLois Curfman McInnes       }
99739e00950SLois Curfman McInnes     }
9981ca473b0SSatish Balay     else {
9991ca473b0SSatish Balay       if (idx) *idx = 0;
10001ca473b0SSatish Balay       if (v)   *v   = 0;
10011ca473b0SSatish Balay     }
1002154123eaSLois Curfman McInnes   }
100339e00950SLois Curfman McInnes   *nz = nztot;
100438597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
100538597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
100639e00950SLois Curfman McInnes   return 0;
100739e00950SLois Curfman McInnes }
100839e00950SLois Curfman McInnes 
10096d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
101039e00950SLois Curfman McInnes {
10117a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
10127a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
10137a0afa10SBarry Smith     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
10147a0afa10SBarry Smith   }
10157a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
101639e00950SLois Curfman McInnes   return 0;
101739e00950SLois Curfman McInnes }
101839e00950SLois Curfman McInnes 
1019cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1020855ac2c5SLois Curfman McInnes {
1021855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1022ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1023416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1024416022c9SBarry Smith   double     sum = 0.0;
102504ca555eSLois Curfman McInnes   Scalar     *v;
102604ca555eSLois Curfman McInnes 
102717699dbbSLois Curfman McInnes   if (aij->size == 1) {
102814183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
102937fa93a5SLois Curfman McInnes   } else {
103004ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
103104ca555eSLois Curfman McInnes       v = amat->a;
103204ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
103304ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
103404ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
103504ca555eSLois Curfman McInnes #else
103604ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
103704ca555eSLois Curfman McInnes #endif
103804ca555eSLois Curfman McInnes       }
103904ca555eSLois Curfman McInnes       v = bmat->a;
104004ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
104104ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
104204ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
104304ca555eSLois Curfman McInnes #else
104404ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
104504ca555eSLois Curfman McInnes #endif
104604ca555eSLois Curfman McInnes       }
10476d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
104804ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
104904ca555eSLois Curfman McInnes     }
105004ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
105104ca555eSLois Curfman McInnes       double *tmp, *tmp2;
105204ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
10530452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
10540452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1055cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
105604ca555eSLois Curfman McInnes       *norm = 0.0;
105704ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
105804ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1059579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
106004ca555eSLois Curfman McInnes       }
106104ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
106204ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1063579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
106404ca555eSLois Curfman McInnes       }
10656d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
106604ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
106704ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
106804ca555eSLois Curfman McInnes       }
10690452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
107004ca555eSLois Curfman McInnes     }
107104ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1072515d9167SLois Curfman McInnes       double ntemp = 0.0;
107304ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1074dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
107504ca555eSLois Curfman McInnes         sum = 0.0;
107604ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1077cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
107804ca555eSLois Curfman McInnes         }
1079dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
108004ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1081cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
108204ca555eSLois Curfman McInnes         }
1083515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
108404ca555eSLois Curfman McInnes       }
10856d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
108604ca555eSLois Curfman McInnes     }
108704ca555eSLois Curfman McInnes     else {
1088515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
108904ca555eSLois Curfman McInnes     }
109037fa93a5SLois Curfman McInnes   }
1091855ac2c5SLois Curfman McInnes   return 0;
1092855ac2c5SLois Curfman McInnes }
1093855ac2c5SLois Curfman McInnes 
10940de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1095b7c46309SBarry Smith {
1096b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1097dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1098416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1099416022c9SBarry Smith   Mat        B;
1100b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1101b7c46309SBarry Smith   Scalar     *array;
1102b7c46309SBarry Smith 
11033638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
11043638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1105b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1106b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1107b7c46309SBarry Smith 
1108b7c46309SBarry Smith   /* copy over the A part */
1109ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1110b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1111b7c46309SBarry Smith   row = a->rstart;
1112dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1113b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1114416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1115b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1116b7c46309SBarry Smith   }
1117b7c46309SBarry Smith   aj = Aloc->j;
11184af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1119b7c46309SBarry Smith 
1120b7c46309SBarry Smith   /* copy over the B part */
1121ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1122b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1123b7c46309SBarry Smith   row = a->rstart;
11240452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1125dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1126b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1127416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1128b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1129b7c46309SBarry Smith   }
11300452661fSBarry Smith   PetscFree(ct);
11316d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11326d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11333638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
11340de55854SLois Curfman McInnes     *matout = B;
11350de55854SLois Curfman McInnes   } else {
11360de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
11370452661fSBarry Smith     PetscFree(a->rowners);
11380de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
11390de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
11400452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
11410452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
11420de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1143a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
11440452661fSBarry Smith     PetscFree(a);
1145416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
11460452661fSBarry Smith     PetscHeaderDestroy(B);
11470de55854SLois Curfman McInnes   }
1148b7c46309SBarry Smith   return 0;
1149b7c46309SBarry Smith }
1150b7c46309SBarry Smith 
1151a008b906SSatish Balay int MatDiagonalScale_MPIAIJ(Mat A,Vec ll,Vec rr)
1152a008b906SSatish Balay {
1153a008b906SSatish Balay   Mat a = ((Mat_MPIAIJ *) A->data)->A;
1154a008b906SSatish Balay   Mat b = ((Mat_MPIAIJ *) A->data)->B;
1155a008b906SSatish Balay   int ierr,s1,s2,s3;
1156a008b906SSatish Balay 
1157a008b906SSatish Balay   if (ll)  {
1158a008b906SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1159a008b906SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
11600e95ebc0SSatish Balay     if (s1!=s2) SETERRQ(1,"MatDiagonalScale_MPIAIJ:non-conforming local sizes");
1161a008b906SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1162a008b906SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1163a008b906SSatish Balay   }
11640e95ebc0SSatish Balay   if (rr) SETERRQ(1,"MatDiagonalScale_MPIAIJ:not supported for right vector");
1165a008b906SSatish Balay   return 0;
1166a008b906SSatish Balay }
1167a008b906SSatish Balay 
1168a008b906SSatish Balay 
1169682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1170682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1171682d7d0cSBarry Smith {
1172682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1173682d7d0cSBarry Smith 
1174682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1175682d7d0cSBarry Smith   else return 0;
1176682d7d0cSBarry Smith }
1177682d7d0cSBarry Smith 
11785a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
11795a838052SSatish Balay {
11805a838052SSatish Balay   *bs = 1;
11815a838052SSatish Balay   return 0;
11825a838052SSatish Balay }
11835a838052SSatish Balay 
1184fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
11853d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
11862f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1187598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
11888a729477SBarry Smith /* -------------------------------------------------------------------*/
11892ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
119039e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
119144a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
119244a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1193*36ce4990SBarry Smith        0,0,
1194*36ce4990SBarry Smith        0,0,
1195*36ce4990SBarry Smith        0,0,
119644a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1197b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1198a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1199a008b906SSatish Balay        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1200ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
12011eb62cbbSBarry Smith        0,
1202299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1203*36ce4990SBarry Smith        0,0,0,0,
1204d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1205*36ce4990SBarry Smith        0,0,
12063e85bedfSBarry Smith        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1207b49de8d1SLois Curfman McInnes        0,0,0,
1208598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1209052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
12103b2fbd54SBarry Smith        MatScale_MPIAIJ,0,0,0,
1211*36ce4990SBarry Smith        MatGetBlockSize_MPIAIJ};
1212*36ce4990SBarry Smith 
12138a729477SBarry Smith 
12141987afe7SBarry Smith /*@C
1215ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
12163a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
12173a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
12183a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
12193a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
12208a729477SBarry Smith 
12218a729477SBarry Smith    Input Parameters:
12221eb62cbbSBarry Smith .  comm - MPI communicator
12237d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
122492e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
122592e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
122692e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
122792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
122892e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
12297d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
123092e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1231ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1232ff756334SLois Curfman McInnes            (same for all local rows)
12332bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
123492e8d321SLois Curfman McInnes            diagonal portion of the local submatrix (possibly different for each row)
12352bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
12362bd5e0b2SLois Curfman McInnes            it is zero.
12372bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1238ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
12392bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
12402bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
12412bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
12428a729477SBarry Smith 
1243ff756334SLois Curfman McInnes    Output Parameter:
124444cd7ae7SLois Curfman McInnes .  A - the matrix
12458a729477SBarry Smith 
1246ff756334SLois Curfman McInnes    Notes:
1247ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1248ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
12490002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
12500002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1251ff756334SLois Curfman McInnes 
1252ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1253ff756334SLois Curfman McInnes    (possibly both).
1254ff756334SLois Curfman McInnes 
12555511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
12565511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
12575511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
12585511cfe3SLois Curfman McInnes 
12595511cfe3SLois Curfman McInnes    Options Database Keys:
12605511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
12616e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
12626e62573dSLois Curfman McInnes $        (max limit=5)
12636e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
12646e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
12656e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
12665511cfe3SLois Curfman McInnes 
1267e0245417SLois Curfman McInnes    Storage Information:
1268e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1269e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1270e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1271e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1272e0245417SLois Curfman McInnes 
1273e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
12745ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
12755ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
12765ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
12775ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1278ff756334SLois Curfman McInnes 
12795511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
12805511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
12812191d07cSBarry Smith 
1282b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1283b810aeb4SBarry Smith $         -------------------
12845511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
12855511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
12865511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
12875511cfe3SLois Curfman McInnes $         -------------------
1288b810aeb4SBarry Smith $
1289b810aeb4SBarry Smith 
12905511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
12915511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
12925511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
12935511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
12945511cfe3SLois Curfman McInnes 
12955511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
12965511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
12975511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
12983d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
129992e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
13003d323bbdSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
13013a511b96SLois Curfman McInnes 
1302dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1303ff756334SLois Curfman McInnes 
1304fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
13058a729477SBarry Smith @*/
13061eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
130744cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
13088a729477SBarry Smith {
130944cd7ae7SLois Curfman McInnes   Mat          B;
131044cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
1311*36ce4990SBarry Smith   int          ierr, i,sum[2],work[2],size;
1312416022c9SBarry Smith 
131344cd7ae7SLois Curfman McInnes   *A = 0;
131444cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
131544cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
131644cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
131744cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
131844cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1319*36ce4990SBarry Smith   MPI_Comm_size(comm,&size);
1320*36ce4990SBarry Smith   if (size == 1) {
1321*36ce4990SBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1322*36ce4990SBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1323*36ce4990SBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1324*36ce4990SBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1325*36ce4990SBarry Smith     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1326*36ce4990SBarry Smith     B->ops.solve             = MatSolve_MPIAIJ;
1327*36ce4990SBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1328*36ce4990SBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1329*36ce4990SBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1330*36ce4990SBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1331*36ce4990SBarry Smith   }
133244cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
133344cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
133444cd7ae7SLois Curfman McInnes   B->factor     = 0;
133544cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
1336d6dfbf8fSBarry Smith 
133744cd7ae7SLois Curfman McInnes   b->insertmode = NOT_SET_VALUES;
133844cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
133944cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
13401eb62cbbSBarry Smith 
1341b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
13422e0155e0SLois Curfman McInnes     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
13431987afe7SBarry Smith 
1344dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
13451eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1346d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1347dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1348dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
13491eb62cbbSBarry Smith   }
135044cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
135144cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
135244cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
135344cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
135444cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
135544cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
13561eb62cbbSBarry Smith 
13571eb62cbbSBarry Smith   /* build local table of row and column ownerships */
135844cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
135944cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1360603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
136144cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
136244cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
136344cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
136444cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
13658a729477SBarry Smith   }
136644cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
136744cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
136844cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
136944cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
137044cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
137144cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
13728a729477SBarry Smith   }
137344cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
137444cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
13758a729477SBarry Smith 
13765ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
137744cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
137844cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
13797b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
138044cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
138144cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
13828a729477SBarry Smith 
13831eb62cbbSBarry Smith   /* build cache for off array entries formed */
138444cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
138544cd7ae7SLois Curfman McInnes   b->colmap      = 0;
138644cd7ae7SLois Curfman McInnes   b->garray      = 0;
138744cd7ae7SLois Curfman McInnes   b->roworiented = 1;
13888a729477SBarry Smith 
13891eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
139044cd7ae7SLois Curfman McInnes   b->lvec      = 0;
139144cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
13928a729477SBarry Smith 
13937a0afa10SBarry Smith   /* stuff for MatGetRow() */
139444cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
139544cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
139644cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
13977a0afa10SBarry Smith 
139844cd7ae7SLois Curfman McInnes   *A = B;
1399d6dfbf8fSBarry Smith   return 0;
1400d6dfbf8fSBarry Smith }
1401c74985f6SBarry Smith 
14023d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1403d6dfbf8fSBarry Smith {
1404d6dfbf8fSBarry Smith   Mat        mat;
1405416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1406a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1407d6dfbf8fSBarry Smith 
1408416022c9SBarry Smith   *newmat       = 0;
14090452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1410a5a9c739SBarry Smith   PLogObjectCreate(mat);
14110452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1412416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
141344a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
141444a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1415d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1416c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1417d6dfbf8fSBarry Smith 
141844cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
141944cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
142044cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
142144cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1422d6dfbf8fSBarry Smith 
1423416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1424416022c9SBarry Smith   a->rend         = oldmat->rend;
1425416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1426416022c9SBarry Smith   a->cend         = oldmat->cend;
142717699dbbSLois Curfman McInnes   a->size         = oldmat->size;
142817699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
142964119d58SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
1430bcd2baecSBarry Smith   a->rowvalues    = 0;
1431bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1432d6dfbf8fSBarry Smith 
1433603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1434603f58a4SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1435603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1436603f58a4SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1437416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14382ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
14390452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1440416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1441416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1442416022c9SBarry Smith   } else a->colmap = 0;
14433f41c07dSBarry Smith   if (oldmat->garray) {
14443f41c07dSBarry Smith     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
14453f41c07dSBarry Smith     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1446464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
14473f41c07dSBarry Smith     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1448416022c9SBarry Smith   } else a->garray = 0;
1449d6dfbf8fSBarry Smith 
1450416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1451416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1452a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1453416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1454416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1455416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1456416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1457416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14585dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
145925cdf11fSBarry Smith   if (flg) {
1460682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1461682d7d0cSBarry Smith   }
14628a729477SBarry Smith   *newmat = mat;
14638a729477SBarry Smith   return 0;
14648a729477SBarry Smith }
1465416022c9SBarry Smith 
146677c4ece6SBarry Smith #include "sys.h"
1467416022c9SBarry Smith 
146819bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1469416022c9SBarry Smith {
1470d65a2f8fSBarry Smith   Mat          A;
1471416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1472d65a2f8fSBarry Smith   Scalar       *vals,*svals;
147319bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1474416022c9SBarry Smith   MPI_Status   status;
147517699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1476d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
147719bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1478416022c9SBarry Smith 
147917699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
148017699dbbSLois Curfman McInnes   if (!rank) {
148190ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
148277c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1483c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1484416022c9SBarry Smith   }
1485416022c9SBarry Smith 
1486416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1487416022c9SBarry Smith   M = header[1]; N = header[2];
1488416022c9SBarry Smith   /* determine ownership of all rows */
148917699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
14900452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1491416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1492416022c9SBarry Smith   rowners[0] = 0;
149317699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1494416022c9SBarry Smith     rowners[i] += rowners[i-1];
1495416022c9SBarry Smith   }
149617699dbbSLois Curfman McInnes   rstart = rowners[rank];
149717699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1498416022c9SBarry Smith 
1499416022c9SBarry Smith   /* distribute row lengths to all processors */
15000452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1501416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
150217699dbbSLois Curfman McInnes   if (!rank) {
15030452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
150477c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
15050452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
150617699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1507d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
15080452661fSBarry Smith     PetscFree(sndcounts);
1509416022c9SBarry Smith   }
1510416022c9SBarry Smith   else {
1511416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1512416022c9SBarry Smith   }
1513416022c9SBarry Smith 
151417699dbbSLois Curfman McInnes   if (!rank) {
1515416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
15160452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1517cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
151817699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1519416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1520416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1521416022c9SBarry Smith       }
1522416022c9SBarry Smith     }
15230452661fSBarry Smith     PetscFree(rowlengths);
1524416022c9SBarry Smith 
1525416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1526416022c9SBarry Smith     maxnz = 0;
152717699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
15280452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1529416022c9SBarry Smith     }
15300452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1531416022c9SBarry Smith 
1532416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1533416022c9SBarry Smith     nz = procsnz[0];
15340452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
153577c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1536d65a2f8fSBarry Smith 
1537d65a2f8fSBarry Smith     /* read in every one elses and ship off */
153817699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1539d65a2f8fSBarry Smith       nz = procsnz[i];
154077c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1541d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1542d65a2f8fSBarry Smith     }
15430452661fSBarry Smith     PetscFree(cols);
1544416022c9SBarry Smith   }
1545416022c9SBarry Smith   else {
1546416022c9SBarry Smith     /* determine buffer space needed for message */
1547416022c9SBarry Smith     nz = 0;
1548416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1549416022c9SBarry Smith       nz += ourlens[i];
1550416022c9SBarry Smith     }
15510452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1552416022c9SBarry Smith 
1553416022c9SBarry Smith     /* receive message of column indices*/
1554d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1555416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1556c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1557416022c9SBarry Smith   }
1558416022c9SBarry Smith 
1559416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1560cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1561416022c9SBarry Smith   jj = 0;
1562416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1563416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1564d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1565416022c9SBarry Smith       jj++;
1566416022c9SBarry Smith     }
1567416022c9SBarry Smith   }
1568d65a2f8fSBarry Smith 
1569d65a2f8fSBarry Smith   /* create our matrix */
1570416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1571416022c9SBarry Smith     ourlens[i] -= offlens[i];
1572416022c9SBarry Smith   }
1573d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1574d65a2f8fSBarry Smith   A = *newmat;
15756d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
1576d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1577d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1578d65a2f8fSBarry Smith   }
1579416022c9SBarry Smith 
158017699dbbSLois Curfman McInnes   if (!rank) {
15810452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1582416022c9SBarry Smith 
1583416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1584416022c9SBarry Smith     nz = procsnz[0];
158577c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1586d65a2f8fSBarry Smith 
1587d65a2f8fSBarry Smith     /* insert into matrix */
1588d65a2f8fSBarry Smith     jj      = rstart;
1589d65a2f8fSBarry Smith     smycols = mycols;
1590d65a2f8fSBarry Smith     svals   = vals;
1591d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1592d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1593d65a2f8fSBarry Smith       smycols += ourlens[i];
1594d65a2f8fSBarry Smith       svals   += ourlens[i];
1595d65a2f8fSBarry Smith       jj++;
1596416022c9SBarry Smith     }
1597416022c9SBarry Smith 
1598d65a2f8fSBarry Smith     /* read in other processors and ship out */
159917699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1600416022c9SBarry Smith       nz = procsnz[i];
160177c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1602d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1603416022c9SBarry Smith     }
16040452661fSBarry Smith     PetscFree(procsnz);
1605416022c9SBarry Smith   }
1606d65a2f8fSBarry Smith   else {
1607d65a2f8fSBarry Smith     /* receive numeric values */
16080452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1609416022c9SBarry Smith 
1610d65a2f8fSBarry Smith     /* receive message of values*/
1611d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1612d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1613c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1614d65a2f8fSBarry Smith 
1615d65a2f8fSBarry Smith     /* insert into matrix */
1616d65a2f8fSBarry Smith     jj      = rstart;
1617d65a2f8fSBarry Smith     smycols = mycols;
1618d65a2f8fSBarry Smith     svals   = vals;
1619d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1620d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1621d65a2f8fSBarry Smith       smycols += ourlens[i];
1622d65a2f8fSBarry Smith       svals   += ourlens[i];
1623d65a2f8fSBarry Smith       jj++;
1624d65a2f8fSBarry Smith     }
1625d65a2f8fSBarry Smith   }
16260452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1627d65a2f8fSBarry Smith 
16286d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
16296d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1630416022c9SBarry Smith   return 0;
1631416022c9SBarry Smith }
1632