xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 55908ccc8670b0883627edac5ce93caa055764f2)
1cb512458SBarry Smith #ifndef lint
2*55908cccSLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.179 1996/11/30 21:40:36 curfman Exp curfman $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
7d9942c19SSatish Balay #include "src/inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
140a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18905e6a2fSBarry Smith   int        n = B->n,i;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23905e6a2fSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
293b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
303b2fbd54SBarry Smith                            PetscTruth *done)
31299609e3SLois Curfman McInnes {
32299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
33299609e3SLois Curfman McInnes   int        ierr;
3417699dbbSLois Curfman McInnes   if (aij->size == 1) {
353b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
363b2fbd54SBarry Smith   } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel");
373b2fbd54SBarry Smith   return 0;
383b2fbd54SBarry Smith }
393b2fbd54SBarry Smith 
403b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
413b2fbd54SBarry Smith                                PetscTruth *done)
423b2fbd54SBarry Smith {
433b2fbd54SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
443b2fbd54SBarry Smith   int        ierr;
453b2fbd54SBarry Smith   if (aij->size == 1) {
463b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
473b2fbd54SBarry Smith   } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel");
48299609e3SLois Curfman McInnes   return 0;
49299609e3SLois Curfman McInnes }
50299609e3SLois Curfman McInnes 
510a198c4cSBarry Smith extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
520a198c4cSBarry Smith 
534b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
548a729477SBarry Smith {
5544a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
564b0e389bSBarry Smith   Scalar     value;
571eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
581eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
59905e6a2fSBarry Smith   int        roworiented = aij->roworiented;
608a729477SBarry Smith 
610a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
6264119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
6348d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
648a729477SBarry Smith   }
650a198c4cSBarry Smith #endif
661eb62cbbSBarry Smith   aij->insertmode = addv;
678a729477SBarry Smith   for ( i=0; i<m; i++ ) {
680a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
694b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
704b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
710a198c4cSBarry Smith #endif
724b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
734b0e389bSBarry Smith       row = im[i] - rstart;
741eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
754b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
764b0e389bSBarry Smith           col = in[j] - cstart;
774b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
780a198c4cSBarry Smith           ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
791eb62cbbSBarry Smith         }
800a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
810a198c4cSBarry Smith         else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");}
820a198c4cSBarry Smith         else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");}
830a198c4cSBarry Smith #endif
841eb62cbbSBarry Smith         else {
85227d817aSBarry Smith           if (mat->was_assembled) {
86905e6a2fSBarry Smith             if (!aij->colmap) {
87905e6a2fSBarry Smith               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
88905e6a2fSBarry Smith             }
89905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
90ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
912493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
924b0e389bSBarry Smith               col =  in[j];
93d6dfbf8fSBarry Smith             }
949e25ed09SBarry Smith           }
954b0e389bSBarry Smith           else col = in[j];
964b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
970a198c4cSBarry Smith           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
981eb62cbbSBarry Smith         }
991eb62cbbSBarry Smith       }
1001eb62cbbSBarry Smith     }
1011eb62cbbSBarry Smith     else {
10290f02eecSBarry Smith       if (roworiented && !aij->donotstash) {
1034b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
1044b0e389bSBarry Smith       }
1054b0e389bSBarry Smith       else {
10690f02eecSBarry Smith         if (!aij->donotstash) {
1074b0e389bSBarry Smith           row = im[i];
1084b0e389bSBarry Smith           for ( j=0; j<n; j++ ) {
1094b0e389bSBarry Smith             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
1104b0e389bSBarry Smith           }
1114b0e389bSBarry Smith         }
1121eb62cbbSBarry Smith       }
1138a729477SBarry Smith     }
11490f02eecSBarry Smith   }
1158a729477SBarry Smith   return 0;
1168a729477SBarry Smith }
1178a729477SBarry Smith 
118b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
119b49de8d1SLois Curfman McInnes {
120b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
121b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
122b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
123b49de8d1SLois Curfman McInnes 
124b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
125b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
126b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
127b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
128b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
129b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
130b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
131b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
132b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
133b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
134b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
135b49de8d1SLois Curfman McInnes         }
136b49de8d1SLois Curfman McInnes         else {
137905e6a2fSBarry Smith           if (!aij->colmap) {
138905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
139905e6a2fSBarry Smith           }
140905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
141e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
142d9d09a02SSatish Balay           else {
143b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
144b49de8d1SLois Curfman McInnes           }
145b49de8d1SLois Curfman McInnes         }
146b49de8d1SLois Curfman McInnes       }
147d9d09a02SSatish Balay     }
148b49de8d1SLois Curfman McInnes     else {
149b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
150b49de8d1SLois Curfman McInnes     }
151b49de8d1SLois Curfman McInnes   }
152b49de8d1SLois Curfman McInnes   return 0;
153b49de8d1SLois Curfman McInnes }
154b49de8d1SLois Curfman McInnes 
155fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1568a729477SBarry Smith {
15744a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
158d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
15917699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
16017699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1611eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1626abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1631eb62cbbSBarry Smith   InsertMode  addv;
1641eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1651eb62cbbSBarry Smith 
1661eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
167d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
168dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
169bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1701eb62cbbSBarry Smith   }
1711eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1721eb62cbbSBarry Smith 
1731eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1740452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
175cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1760452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1771eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1781eb62cbbSBarry Smith     idx = aij->stash.idx[i];
17917699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1801eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1811eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1828a729477SBarry Smith       }
1838a729477SBarry Smith     }
1848a729477SBarry Smith   }
18517699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1861eb62cbbSBarry Smith 
1871eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1880452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
18917699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
19017699dbbSLois Curfman McInnes   nreceives = work[rank];
19117699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
19217699dbbSLois Curfman McInnes   nmax = work[rank];
1930452661fSBarry Smith   PetscFree(work);
1941eb62cbbSBarry Smith 
1951eb62cbbSBarry Smith   /* post receives:
1961eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1971eb62cbbSBarry Smith      (global index,value) we store the global index as a double
198d6dfbf8fSBarry Smith      to simplify the message passing.
1991eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
2001eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
2011eb62cbbSBarry Smith      this is a lot of wasted space.
2021eb62cbbSBarry Smith 
2031eb62cbbSBarry Smith 
2041eb62cbbSBarry Smith        This could be done better.
2051eb62cbbSBarry Smith   */
2060452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
20778b31e54SBarry Smith   CHKPTRQ(rvalues);
2080452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
20978b31e54SBarry Smith   CHKPTRQ(recv_waits);
2101eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
211416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
2121eb62cbbSBarry Smith               comm,recv_waits+i);
2131eb62cbbSBarry Smith   }
2141eb62cbbSBarry Smith 
2151eb62cbbSBarry Smith   /* do sends:
2161eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
2171eb62cbbSBarry Smith          the ith processor
2181eb62cbbSBarry Smith   */
2190452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
2200452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
22178b31e54SBarry Smith   CHKPTRQ(send_waits);
2220452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
2231eb62cbbSBarry Smith   starts[0] = 0;
22417699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2251eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2261eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2271eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2281eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2291eb62cbbSBarry Smith   }
2300452661fSBarry Smith   PetscFree(owner);
2311eb62cbbSBarry Smith   starts[0] = 0;
23217699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2331eb62cbbSBarry Smith   count = 0;
23417699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2351eb62cbbSBarry Smith     if (procs[i]) {
236416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2371eb62cbbSBarry Smith                 comm,send_waits+count++);
2381eb62cbbSBarry Smith     }
2391eb62cbbSBarry Smith   }
2400452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2411eb62cbbSBarry Smith 
2421eb62cbbSBarry Smith   /* Free cache space */
243*55908cccSLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
24478b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2451eb62cbbSBarry Smith 
2461eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2471eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2481eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2491eb62cbbSBarry Smith   aij->rmax       = nmax;
2501eb62cbbSBarry Smith 
2511eb62cbbSBarry Smith   return 0;
2521eb62cbbSBarry Smith }
25344a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2541eb62cbbSBarry Smith 
255fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2561eb62cbbSBarry Smith {
25744a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
2581eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
259416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
260905e6a2fSBarry Smith   int         row,col,other_disassembled;
2611eb62cbbSBarry Smith   Scalar      *values,val;
2621eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2631eb62cbbSBarry Smith 
2641eb62cbbSBarry Smith   /*  wait on receives */
2651eb62cbbSBarry Smith   while (count) {
266d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2671eb62cbbSBarry Smith     /* unpack receives into our local space */
268d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
269c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2701eb62cbbSBarry Smith     n = n/3;
2711eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
272227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
273227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2741eb62cbbSBarry Smith       val = values[3*i+2];
2751eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2761eb62cbbSBarry Smith         col -= aij->cstart;
2771eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2781eb62cbbSBarry Smith       }
2791eb62cbbSBarry Smith       else {
28055a1b374SBarry Smith         if (mat->was_assembled || mat->assembled) {
281905e6a2fSBarry Smith           if (!aij->colmap) {
282905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
283905e6a2fSBarry Smith           }
284905e6a2fSBarry Smith           col = aij->colmap[col] - 1;
285ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2862493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
287227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
288d6dfbf8fSBarry Smith           }
2899e25ed09SBarry Smith         }
2901eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2911eb62cbbSBarry Smith       }
2921eb62cbbSBarry Smith     }
2931eb62cbbSBarry Smith     count--;
2941eb62cbbSBarry Smith   }
2950452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2961eb62cbbSBarry Smith 
2971eb62cbbSBarry Smith   /* wait on sends */
2981eb62cbbSBarry Smith   if (aij->nsends) {
2990a198c4cSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
3001eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
3010452661fSBarry Smith     PetscFree(send_status);
3021eb62cbbSBarry Smith   }
3030452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
3041eb62cbbSBarry Smith 
30564119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
30678b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
30778b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
3081eb62cbbSBarry Smith 
3092493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
3102493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
311227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
312227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
3132493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
3142493cbb0SBarry Smith   }
3152493cbb0SBarry Smith 
3166d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
31778b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
3185e42470aSBarry Smith   }
31978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
32078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
3215e42470aSBarry Smith 
3227a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
3238a729477SBarry Smith   return 0;
3248a729477SBarry Smith }
3258a729477SBarry Smith 
3262ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3271eb62cbbSBarry Smith {
32844a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
329dbd7a890SLois Curfman McInnes   int        ierr;
33078b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
33178b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3321eb62cbbSBarry Smith   return 0;
3331eb62cbbSBarry Smith }
3341eb62cbbSBarry Smith 
3351eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3361eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3371eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3381eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3391eb62cbbSBarry Smith    routine.
3401eb62cbbSBarry Smith */
34144a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3421eb62cbbSBarry Smith {
34344a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
34417699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3456abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
34617699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3475392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
348d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
349d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3501eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3511eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3521eb62cbbSBarry Smith   IS             istmp;
3531eb62cbbSBarry Smith 
35477c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
35578b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3561eb62cbbSBarry Smith 
3571eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3580452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
359cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3600452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3611eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3621eb62cbbSBarry Smith     idx = rows[i];
3631eb62cbbSBarry Smith     found = 0;
36417699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3651eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3661eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3671eb62cbbSBarry Smith       }
3681eb62cbbSBarry Smith     }
369bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3701eb62cbbSBarry Smith   }
37117699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3721eb62cbbSBarry Smith 
3731eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3740452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
37517699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
37617699dbbSLois Curfman McInnes   nrecvs = work[rank];
37717699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
37817699dbbSLois Curfman McInnes   nmax = work[rank];
3790452661fSBarry Smith   PetscFree(work);
3801eb62cbbSBarry Smith 
3811eb62cbbSBarry Smith   /* post receives:   */
3820452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
38378b31e54SBarry Smith   CHKPTRQ(rvalues);
3840452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
38578b31e54SBarry Smith   CHKPTRQ(recv_waits);
3861eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
387416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3881eb62cbbSBarry Smith   }
3891eb62cbbSBarry Smith 
3901eb62cbbSBarry Smith   /* do sends:
3911eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3921eb62cbbSBarry Smith          the ith processor
3931eb62cbbSBarry Smith   */
3940452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3950452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
39678b31e54SBarry Smith   CHKPTRQ(send_waits);
3970452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3981eb62cbbSBarry Smith   starts[0] = 0;
39917699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4001eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
4011eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
4021eb62cbbSBarry Smith   }
4031eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
4041eb62cbbSBarry Smith 
4051eb62cbbSBarry Smith   starts[0] = 0;
40617699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4071eb62cbbSBarry Smith   count = 0;
40817699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
4091eb62cbbSBarry Smith     if (procs[i]) {
410416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
4111eb62cbbSBarry Smith     }
4121eb62cbbSBarry Smith   }
4130452661fSBarry Smith   PetscFree(starts);
4141eb62cbbSBarry Smith 
41517699dbbSLois Curfman McInnes   base = owners[rank];
4161eb62cbbSBarry Smith 
4171eb62cbbSBarry Smith   /*  wait on receives */
4180452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
4191eb62cbbSBarry Smith   source = lens + nrecvs;
4201eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
4211eb62cbbSBarry Smith   while (count) {
422d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
4231eb62cbbSBarry Smith     /* unpack receives into our local space */
4241eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
425d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
426d6dfbf8fSBarry Smith     lens[imdex]  = n;
4271eb62cbbSBarry Smith     slen += n;
4281eb62cbbSBarry Smith     count--;
4291eb62cbbSBarry Smith   }
4300452661fSBarry Smith   PetscFree(recv_waits);
4311eb62cbbSBarry Smith 
4321eb62cbbSBarry Smith   /* move the data into the send scatter */
4330452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4341eb62cbbSBarry Smith   count = 0;
4351eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4361eb62cbbSBarry Smith     values = rvalues + i*nmax;
4371eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4381eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4391eb62cbbSBarry Smith     }
4401eb62cbbSBarry Smith   }
4410452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4420452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4431eb62cbbSBarry Smith 
4441eb62cbbSBarry Smith   /* actually zap the local rows */
445537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
446464493b3SBarry Smith   PLogObjectParent(A,istmp);
4470452661fSBarry Smith   PetscFree(lrows);
44878b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
44978b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
45078b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4511eb62cbbSBarry Smith 
4521eb62cbbSBarry Smith   /* wait on sends */
4531eb62cbbSBarry Smith   if (nsends) {
4540452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
45578b31e54SBarry Smith     CHKPTRQ(send_status);
4561eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4570452661fSBarry Smith     PetscFree(send_status);
4581eb62cbbSBarry Smith   }
4590452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4601eb62cbbSBarry Smith 
4611eb62cbbSBarry Smith   return 0;
4621eb62cbbSBarry Smith }
4631eb62cbbSBarry Smith 
464416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4651eb62cbbSBarry Smith {
466416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
467fbd6ef76SBarry Smith   int        ierr,nt;
468416022c9SBarry Smith 
469a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
470fbd6ef76SBarry Smith   if (nt != a->n) {
47147b4a8eaSLois Curfman McInnes     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx");
472fbd6ef76SBarry Smith   }
47343a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
47438597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
47543a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
47638597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4771eb62cbbSBarry Smith   return 0;
4781eb62cbbSBarry Smith }
4791eb62cbbSBarry Smith 
480416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
481da3a660dSBarry Smith {
482416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
483da3a660dSBarry Smith   int        ierr;
48443a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
48538597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
48643a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
48738597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
488da3a660dSBarry Smith   return 0;
489da3a660dSBarry Smith }
490da3a660dSBarry Smith 
491416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
492da3a660dSBarry Smith {
493416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
494da3a660dSBarry Smith   int        ierr;
495da3a660dSBarry Smith 
496da3a660dSBarry Smith   /* do nondiagonal part */
49738597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
498da3a660dSBarry Smith   /* send it on its way */
499537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
500da3a660dSBarry Smith   /* do local part */
50138597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
502da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
503da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
504da3a660dSBarry Smith   /* but is not perhaps always true. */
505537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
506da3a660dSBarry Smith   return 0;
507da3a660dSBarry Smith }
508da3a660dSBarry Smith 
509416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
510da3a660dSBarry Smith {
511416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
512da3a660dSBarry Smith   int        ierr;
513da3a660dSBarry Smith 
514da3a660dSBarry Smith   /* do nondiagonal part */
51538597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
516da3a660dSBarry Smith   /* send it on its way */
517537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
518da3a660dSBarry Smith   /* do local part */
51938597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
520da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
521da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
522da3a660dSBarry Smith   /* but is not perhaps always true. */
5230a198c4cSBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
524da3a660dSBarry Smith   return 0;
525da3a660dSBarry Smith }
526da3a660dSBarry Smith 
5271eb62cbbSBarry Smith /*
5281eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5291eb62cbbSBarry Smith    diagonal block
5301eb62cbbSBarry Smith */
531416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5321eb62cbbSBarry Smith {
533416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
53444cd7ae7SLois Curfman McInnes   if (a->M != a->N)
53544cd7ae7SLois Curfman McInnes     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
5365baf8537SBarry Smith   if (a->rstart != a->cstart || a->rend != a->cend) {
5375baf8537SBarry Smith     SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition");  }
538416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5391eb62cbbSBarry Smith }
5401eb62cbbSBarry Smith 
541052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
542052efed2SBarry Smith {
543052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
544052efed2SBarry Smith   int        ierr;
545052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
546052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
547052efed2SBarry Smith   return 0;
548052efed2SBarry Smith }
549052efed2SBarry Smith 
55044a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5511eb62cbbSBarry Smith {
5521eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
55344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5541eb62cbbSBarry Smith   int        ierr;
555a5a9c739SBarry Smith #if defined(PETSC_LOG)
5566e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
557a5a9c739SBarry Smith #endif
5580452661fSBarry Smith   PetscFree(aij->rowners);
55978b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
56078b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5610452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5620452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5631eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
564a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5657a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
5660452661fSBarry Smith   PetscFree(aij);
56790f02eecSBarry Smith   if (mat->mapping) {
56890f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
56990f02eecSBarry Smith   }
570a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5710452661fSBarry Smith   PetscHeaderDestroy(mat);
5721eb62cbbSBarry Smith   return 0;
5731eb62cbbSBarry Smith }
5747857610eSBarry Smith #include "draw.h"
575b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
576ee50ffe9SBarry Smith 
577416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5781eb62cbbSBarry Smith {
579416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
580416022c9SBarry Smith   int         ierr;
581416022c9SBarry Smith 
58217699dbbSLois Curfman McInnes   if (aij->size == 1) {
583416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
584416022c9SBarry Smith   }
585a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
586416022c9SBarry Smith   return 0;
587416022c9SBarry Smith }
588416022c9SBarry Smith 
589d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
590416022c9SBarry Smith {
59144a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
592dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
593a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
594d636dbe3SBarry Smith   FILE        *fd;
59519bcc07fSBarry Smith   ViewerType  vtype;
596416022c9SBarry Smith 
59719bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
59819bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
59990ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
6000a198c4cSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
6014e220ebcSLois Curfman McInnes       MatInfo info;
6024e220ebcSLois Curfman McInnes       int     flg;
603a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
60490ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
6054e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
60695e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
60777c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
60895e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
6094e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
61095e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
6114e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
6124e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
6134e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
6144e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
6154e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
616a56f8943SBarry Smith       fflush(fd);
61777c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
618a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
619416022c9SBarry Smith       return 0;
620416022c9SBarry Smith     }
6210a198c4cSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
62208480c60SBarry Smith       return 0;
62308480c60SBarry Smith     }
624416022c9SBarry Smith   }
625416022c9SBarry Smith 
62619bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
62719bcc07fSBarry Smith     Draw       draw;
62819bcc07fSBarry Smith     PetscTruth isnull;
62919bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
63019bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
63119bcc07fSBarry Smith   }
63219bcc07fSBarry Smith 
63319bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
63490ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
63577c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
636d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
63717699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
6381eb62cbbSBarry Smith            aij->cend);
63978b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
64078b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
641d13ab20cSBarry Smith     fflush(fd);
64277c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
643d13ab20cSBarry Smith   }
644416022c9SBarry Smith   else {
645a56f8943SBarry Smith     int size = aij->size;
646a56f8943SBarry Smith     rank = aij->rank;
64717699dbbSLois Curfman McInnes     if (size == 1) {
64878b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
64995373324SBarry Smith     }
65095373324SBarry Smith     else {
65195373324SBarry Smith       /* assemble the entire matrix onto first processor. */
65295373324SBarry Smith       Mat         A;
653ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6542eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
65595373324SBarry Smith       Scalar      *a;
6562ee70a88SLois Curfman McInnes 
65717699dbbSLois Curfman McInnes       if (!rank) {
658b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
659c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
66095373324SBarry Smith       }
66195373324SBarry Smith       else {
662b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
663c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
66495373324SBarry Smith       }
665464493b3SBarry Smith       PLogObjectParent(mat,A);
666416022c9SBarry Smith 
66795373324SBarry Smith       /* copy over the A part */
668ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6692ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
67095373324SBarry Smith       row = aij->rstart;
671dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
67295373324SBarry Smith       for ( i=0; i<m; i++ ) {
673416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
67495373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
67595373324SBarry Smith       }
6762ee70a88SLois Curfman McInnes       aj = Aloc->j;
677dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
67895373324SBarry Smith 
67995373324SBarry Smith       /* copy over the B part */
680ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6812ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
68295373324SBarry Smith       row = aij->rstart;
6830452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
684dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
68595373324SBarry Smith       for ( i=0; i<m; i++ ) {
686416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
68795373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
68895373324SBarry Smith       }
6890452661fSBarry Smith       PetscFree(ct);
6906d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
6916d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
69217699dbbSLois Curfman McInnes       if (!rank) {
69378b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
69495373324SBarry Smith       }
69578b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
69695373324SBarry Smith     }
69795373324SBarry Smith   }
6981eb62cbbSBarry Smith   return 0;
6991eb62cbbSBarry Smith }
7001eb62cbbSBarry Smith 
701416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
702416022c9SBarry Smith {
703416022c9SBarry Smith   Mat         mat = (Mat) obj;
704416022c9SBarry Smith   int         ierr;
70519bcc07fSBarry Smith   ViewerType  vtype;
706416022c9SBarry Smith 
70719bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
70819bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
70919bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
710d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
711416022c9SBarry Smith   }
71219bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
713416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
714416022c9SBarry Smith   }
715416022c9SBarry Smith   return 0;
716416022c9SBarry Smith }
717416022c9SBarry Smith 
7181eb62cbbSBarry Smith /*
7191eb62cbbSBarry Smith     This has to provide several versions.
7201eb62cbbSBarry Smith 
7211eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
7221eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
723d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
7241eb62cbbSBarry Smith */
725fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
726dbb450caSBarry Smith                            double fshift,int its,Vec xx)
7278a729477SBarry Smith {
72844a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
729d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
730ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
731c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
7326abc6512SBarry Smith   int        ierr,*idx, *diag;
733416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
7348a729477SBarry Smith 
735d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
736dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
737dbb450caSBarry Smith   ls = ls + shift;
738ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
739d6dfbf8fSBarry Smith   diag = A->diag;
740c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
741da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
74238597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
743da3a660dSBarry Smith     }
74443a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
74578b31e54SBarry Smith     CHKERRQ(ierr);
74643a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
74778b31e54SBarry Smith     CHKERRQ(ierr);
748d6dfbf8fSBarry Smith     while (its--) {
749d6dfbf8fSBarry Smith       /* go down through the rows */
750d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
751d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
752dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
753dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
754d6dfbf8fSBarry Smith         sum  = b[i];
755d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
756dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
757d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
758dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
759dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
760d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
76155a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
762d6dfbf8fSBarry Smith       }
763d6dfbf8fSBarry Smith       /* come up through the rows */
764d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
765d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
766dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
767dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
768d6dfbf8fSBarry Smith         sum  = b[i];
769d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
770dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
771d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
772dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
773dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
774d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
77555a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
776d6dfbf8fSBarry Smith       }
777d6dfbf8fSBarry Smith     }
778d6dfbf8fSBarry Smith   }
779d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
780da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
78138597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
782da3a660dSBarry Smith     }
78343a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
78478b31e54SBarry Smith     CHKERRQ(ierr);
78543a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
78678b31e54SBarry Smith     CHKERRQ(ierr);
787d6dfbf8fSBarry Smith     while (its--) {
788d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
789d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
790dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
791dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
792d6dfbf8fSBarry Smith         sum  = b[i];
793d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
794dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
795d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
796dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
797dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
798d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
79955a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
800d6dfbf8fSBarry Smith       }
801d6dfbf8fSBarry Smith     }
802d6dfbf8fSBarry Smith   }
803d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
804da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
80538597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
806da3a660dSBarry Smith     }
80743a90d84SBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
80878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
80943a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
81078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
811d6dfbf8fSBarry Smith     while (its--) {
812d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
813d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
814dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
815dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
816d6dfbf8fSBarry Smith         sum  = b[i];
817d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
818dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
819d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
820dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
821dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
822d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
82355a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
824d6dfbf8fSBarry Smith       }
825d6dfbf8fSBarry Smith     }
826d6dfbf8fSBarry Smith   }
827c16cb8f2SBarry Smith   else {
828c16cb8f2SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
829c16cb8f2SBarry Smith   }
8308a729477SBarry Smith   return 0;
8318a729477SBarry Smith }
832a66be287SLois Curfman McInnes 
8334e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
834a66be287SLois Curfman McInnes {
835a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
836a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
8374e220ebcSLois Curfman McInnes   int        ierr;
8384e220ebcSLois Curfman McInnes   double     isend[5], irecv[5];
839a66be287SLois Curfman McInnes 
8404e220ebcSLois Curfman McInnes   info->rows_global    = (double)mat->M;
8414e220ebcSLois Curfman McInnes   info->columns_global = (double)mat->N;
8424e220ebcSLois Curfman McInnes   info->rows_local     = (double)mat->m;
8434e220ebcSLois Curfman McInnes   info->columns_local  = (double)mat->N;
8444e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
8454e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
8464e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
8474e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
8484e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
8494e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
8504e220ebcSLois Curfman McInnes   isend[3] += info->memory;  isend[4] += info->mallocs;
851a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
8524e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
8534e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
8544e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
8554e220ebcSLois Curfman McInnes     info->memory       = isend[3];
8564e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
857a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
8584e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
8594e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8604e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8614e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8624e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8634e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
864a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
8654e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
8664e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8674e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8684e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8694e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8704e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
871a66be287SLois Curfman McInnes   }
8724e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
8734e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
8744e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
8754e220ebcSLois Curfman McInnes 
876a66be287SLois Curfman McInnes   return 0;
877a66be287SLois Curfman McInnes }
878a66be287SLois Curfman McInnes 
879299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
880299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
881299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
882299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
883299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
884299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
885299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
886299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
887299609e3SLois Curfman McInnes 
888416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
889c74985f6SBarry Smith {
890c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
891c74985f6SBarry Smith 
8926d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
8936d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
8946da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
895b1fbbac0SLois Curfman McInnes       op == MAT_COLUMNS_SORTED) {
896b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
897b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
898b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
899aeafbbfcSLois Curfman McInnes     a->roworiented = 1;
900c0bbcb79SLois Curfman McInnes     MatSetOption(a->A,op);
901c0bbcb79SLois Curfman McInnes     MatSetOption(a->B,op);
902b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
9036da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
9046d4a8577SBarry Smith              op == MAT_SYMMETRIC ||
9056d4a8577SBarry Smith              op == MAT_STRUCTURALLY_SYMMETRIC ||
9066d4a8577SBarry Smith              op == MAT_YES_NEW_DIAGONALS)
90794a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
9086d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
9094b0e389bSBarry Smith     a->roworiented = 0;
9104b0e389bSBarry Smith     MatSetOption(a->A,op);
9114b0e389bSBarry Smith     MatSetOption(a->B,op);
91290f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
91390f02eecSBarry Smith     a->donotstash = 1;
91490f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
9156d4a8577SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");}
916c0bbcb79SLois Curfman McInnes   else
9174d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
918c74985f6SBarry Smith   return 0;
919c74985f6SBarry Smith }
920c74985f6SBarry Smith 
921d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
922c74985f6SBarry Smith {
92344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
924c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
925c74985f6SBarry Smith   return 0;
926c74985f6SBarry Smith }
927c74985f6SBarry Smith 
928d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
929c74985f6SBarry Smith {
93044a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
931b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
932c74985f6SBarry Smith   return 0;
933c74985f6SBarry Smith }
934c74985f6SBarry Smith 
935d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
936c74985f6SBarry Smith {
93744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
938c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
939c74985f6SBarry Smith   return 0;
940c74985f6SBarry Smith }
941c74985f6SBarry Smith 
9426d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
9436d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
9446d84be18SBarry Smith 
9456d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
94639e00950SLois Curfman McInnes {
947154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
94870f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
949154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
950154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
95170f0671dSBarry Smith   int        *cmap, *idx_p;
95239e00950SLois Curfman McInnes 
9537a0afa10SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
9547a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
9557a0afa10SBarry Smith 
95670f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
9577a0afa10SBarry Smith     /*
9587a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
9597a0afa10SBarry Smith     */
9607a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
961c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
962c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
9637a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
9647a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
9657a0afa10SBarry Smith     }
9667a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
9677a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
9687a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
9697a0afa10SBarry Smith   }
9707a0afa10SBarry Smith 
971b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
972abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
97339e00950SLois Curfman McInnes 
974154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
975154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
976154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
97738597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
97838597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
979154123eaSLois Curfman McInnes   nztot = nzA + nzB;
980154123eaSLois Curfman McInnes 
98170f0671dSBarry Smith   cmap  = mat->garray;
982154123eaSLois Curfman McInnes   if (v  || idx) {
983154123eaSLois Curfman McInnes     if (nztot) {
984154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
98570f0671dSBarry Smith       int imark = -1;
986154123eaSLois Curfman McInnes       if (v) {
98770f0671dSBarry Smith         *v = v_p = mat->rowvalues;
98839e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
98970f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
990154123eaSLois Curfman McInnes           else break;
991154123eaSLois Curfman McInnes         }
992154123eaSLois Curfman McInnes         imark = i;
99370f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
99470f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
995154123eaSLois Curfman McInnes       }
996154123eaSLois Curfman McInnes       if (idx) {
99770f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
99870f0671dSBarry Smith         if (imark > -1) {
99970f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
100070f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
100170f0671dSBarry Smith           }
100270f0671dSBarry Smith         } else {
1003154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
100470f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1005154123eaSLois Curfman McInnes             else break;
1006154123eaSLois Curfman McInnes           }
1007154123eaSLois Curfman McInnes           imark = i;
100870f0671dSBarry Smith         }
100970f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
101070f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
101139e00950SLois Curfman McInnes       }
101239e00950SLois Curfman McInnes     }
10131ca473b0SSatish Balay     else {
10141ca473b0SSatish Balay       if (idx) *idx = 0;
10151ca473b0SSatish Balay       if (v)   *v   = 0;
10161ca473b0SSatish Balay     }
1017154123eaSLois Curfman McInnes   }
101839e00950SLois Curfman McInnes   *nz = nztot;
101938597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
102038597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
102139e00950SLois Curfman McInnes   return 0;
102239e00950SLois Curfman McInnes }
102339e00950SLois Curfman McInnes 
10246d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
102539e00950SLois Curfman McInnes {
10267a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
10277a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
10287a0afa10SBarry Smith     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
10297a0afa10SBarry Smith   }
10307a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
103139e00950SLois Curfman McInnes   return 0;
103239e00950SLois Curfman McInnes }
103339e00950SLois Curfman McInnes 
1034cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1035855ac2c5SLois Curfman McInnes {
1036855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1037ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1038416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1039416022c9SBarry Smith   double     sum = 0.0;
104004ca555eSLois Curfman McInnes   Scalar     *v;
104104ca555eSLois Curfman McInnes 
104217699dbbSLois Curfman McInnes   if (aij->size == 1) {
104314183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
104437fa93a5SLois Curfman McInnes   } else {
104504ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
104604ca555eSLois Curfman McInnes       v = amat->a;
104704ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
104804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
104904ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
105004ca555eSLois Curfman McInnes #else
105104ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
105204ca555eSLois Curfman McInnes #endif
105304ca555eSLois Curfman McInnes       }
105404ca555eSLois Curfman McInnes       v = bmat->a;
105504ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
105604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
105704ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
105804ca555eSLois Curfman McInnes #else
105904ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
106004ca555eSLois Curfman McInnes #endif
106104ca555eSLois Curfman McInnes       }
10626d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
106304ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
106404ca555eSLois Curfman McInnes     }
106504ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
106604ca555eSLois Curfman McInnes       double *tmp, *tmp2;
106704ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
10680452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
10690452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1070cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
107104ca555eSLois Curfman McInnes       *norm = 0.0;
107204ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
107304ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1074579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
107504ca555eSLois Curfman McInnes       }
107604ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
107704ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1078579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
107904ca555eSLois Curfman McInnes       }
10806d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
108104ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
108204ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
108304ca555eSLois Curfman McInnes       }
10840452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
108504ca555eSLois Curfman McInnes     }
108604ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1087515d9167SLois Curfman McInnes       double ntemp = 0.0;
108804ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1089dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
109004ca555eSLois Curfman McInnes         sum = 0.0;
109104ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1092cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
109304ca555eSLois Curfman McInnes         }
1094dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
109504ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1096cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
109704ca555eSLois Curfman McInnes         }
1098515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
109904ca555eSLois Curfman McInnes       }
11006d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
110104ca555eSLois Curfman McInnes     }
110204ca555eSLois Curfman McInnes     else {
1103515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
110404ca555eSLois Curfman McInnes     }
110537fa93a5SLois Curfman McInnes   }
1106855ac2c5SLois Curfman McInnes   return 0;
1107855ac2c5SLois Curfman McInnes }
1108855ac2c5SLois Curfman McInnes 
11090de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1110b7c46309SBarry Smith {
1111b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1112dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1113416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1114416022c9SBarry Smith   Mat        B;
1115b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1116b7c46309SBarry Smith   Scalar     *array;
1117b7c46309SBarry Smith 
11183638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
11193638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1120b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1121b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1122b7c46309SBarry Smith 
1123b7c46309SBarry Smith   /* copy over the A part */
1124ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1125b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1126b7c46309SBarry Smith   row = a->rstart;
1127dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1128b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1129416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1130b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1131b7c46309SBarry Smith   }
1132b7c46309SBarry Smith   aj = Aloc->j;
11334af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1134b7c46309SBarry Smith 
1135b7c46309SBarry Smith   /* copy over the B part */
1136ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1137b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1138b7c46309SBarry Smith   row = a->rstart;
11390452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1140dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1141b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1142416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1143b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1144b7c46309SBarry Smith   }
11450452661fSBarry Smith   PetscFree(ct);
11466d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11476d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11483638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
11490de55854SLois Curfman McInnes     *matout = B;
11500de55854SLois Curfman McInnes   } else {
11510de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
11520452661fSBarry Smith     PetscFree(a->rowners);
11530de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
11540de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
11550452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
11560452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
11570de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1158a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
11590452661fSBarry Smith     PetscFree(a);
1160416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
11610452661fSBarry Smith     PetscHeaderDestroy(B);
11620de55854SLois Curfman McInnes   }
1163b7c46309SBarry Smith   return 0;
1164b7c46309SBarry Smith }
1165b7c46309SBarry Smith 
11664b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1167a008b906SSatish Balay {
11684b967eb1SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
11694b967eb1SSatish Balay   Mat a = aij->A, b = aij->B;
1170a008b906SSatish Balay   int ierr,s1,s2,s3;
1171a008b906SSatish Balay 
11724b967eb1SSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
11734b967eb1SSatish Balay   if (rr) {
11744b967eb1SSatish Balay     s3 = aij->n;
11754b967eb1SSatish Balay     VecGetLocalSize_Fast(rr,s1);
11764b967eb1SSatish Balay     if (s1!=s3) SETERRQ(1,"MatDiagonalScale: right vector non-conforming local size");
11774b967eb1SSatish Balay     /* Overlap communication with computation. */
117843a90d84SBarry Smith     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1179a008b906SSatish Balay   }
11804b967eb1SSatish Balay   if (ll) {
11814b967eb1SSatish Balay     VecGetLocalSize_Fast(ll,s1);
11824b967eb1SSatish Balay     if (s1!=s2) SETERRQ(1,"MatDiagonalScale: left vector non-conforming local size");
1183c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
11844b967eb1SSatish Balay   }
11854b967eb1SSatish Balay   /* scale  the diagonal block */
1186c351683dSSatish Balay   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
11874b967eb1SSatish Balay 
11884b967eb1SSatish Balay   if (rr) {
11894b967eb1SSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
119043a90d84SBarry Smith     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1191c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
11924b967eb1SSatish Balay   }
11934b967eb1SSatish Balay 
1194a008b906SSatish Balay   return 0;
1195a008b906SSatish Balay }
1196a008b906SSatish Balay 
1197a008b906SSatish Balay 
1198682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1199682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1200682d7d0cSBarry Smith {
1201682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1202682d7d0cSBarry Smith 
1203682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1204682d7d0cSBarry Smith   else return 0;
1205682d7d0cSBarry Smith }
1206682d7d0cSBarry Smith 
12075a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
12085a838052SSatish Balay {
12095a838052SSatish Balay   *bs = 1;
12105a838052SSatish Balay   return 0;
12115a838052SSatish Balay }
12125a838052SSatish Balay 
1213fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
12143d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
12152f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
12160a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
12170a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
12188a729477SBarry Smith /* -------------------------------------------------------------------*/
12192ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
122039e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
122144a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
122244a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
122336ce4990SBarry Smith        0,0,
122436ce4990SBarry Smith        0,0,
122536ce4990SBarry Smith        0,0,
122644a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1227b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1228a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1229a008b906SSatish Balay        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1230ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
12311eb62cbbSBarry Smith        0,
1232299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
123336ce4990SBarry Smith        0,0,0,0,
1234d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
123536ce4990SBarry Smith        0,0,
12363e85bedfSBarry Smith        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1237b49de8d1SLois Curfman McInnes        0,0,0,
1238598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1239052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
12403b2fbd54SBarry Smith        MatScale_MPIAIJ,0,0,0,
12410a198c4cSBarry Smith        MatGetBlockSize_MPIAIJ,0,0,0,0,
12420a198c4cSBarry Smith        MatFDColoringCreate_MPIAIJ};
124336ce4990SBarry Smith 
12448a729477SBarry Smith 
12451987afe7SBarry Smith /*@C
1246ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
12473a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
12483a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
12493a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
12503a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
12518a729477SBarry Smith 
12528a729477SBarry Smith    Input Parameters:
12531eb62cbbSBarry Smith .  comm - MPI communicator
12547d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
125592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
125692e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
125792e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
125892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
125992e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
12607d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
126192e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1262ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1263ff756334SLois Curfman McInnes            (same for all local rows)
12642bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
126592e8d321SLois Curfman McInnes            diagonal portion of the local submatrix (possibly different for each row)
12662bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
12672bd5e0b2SLois Curfman McInnes            it is zero.
12682bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1269ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
12702bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
12712bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
12722bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
12738a729477SBarry Smith 
1274ff756334SLois Curfman McInnes    Output Parameter:
127544cd7ae7SLois Curfman McInnes .  A - the matrix
12768a729477SBarry Smith 
1277ff756334SLois Curfman McInnes    Notes:
1278ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1279ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
12800002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
12810002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1282ff756334SLois Curfman McInnes 
1283ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1284ff756334SLois Curfman McInnes    (possibly both).
1285ff756334SLois Curfman McInnes 
12865511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
12875511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
12885511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
12895511cfe3SLois Curfman McInnes 
12905511cfe3SLois Curfman McInnes    Options Database Keys:
12915511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
12926e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
12936e62573dSLois Curfman McInnes $        (max limit=5)
12946e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
12956e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
12966e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
12975511cfe3SLois Curfman McInnes 
1298e0245417SLois Curfman McInnes    Storage Information:
1299e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1300e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1301e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1302e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1303e0245417SLois Curfman McInnes 
1304e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
13055ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
13065ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
13075ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
13085ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1309ff756334SLois Curfman McInnes 
13105511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
13115511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
13122191d07cSBarry Smith 
1313b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1314b810aeb4SBarry Smith $         -------------------
13155511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
13165511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
13175511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
13185511cfe3SLois Curfman McInnes $         -------------------
1319b810aeb4SBarry Smith $
1320b810aeb4SBarry Smith 
13215511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
13225511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
13235511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
13245511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
13255511cfe3SLois Curfman McInnes 
13265511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
13275511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
13285511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
13293d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
133092e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
13316da5968aSLois Curfman McInnes    matrices.
13323a511b96SLois Curfman McInnes 
1333dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1334ff756334SLois Curfman McInnes 
1335fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
13368a729477SBarry Smith @*/
13371eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
133844cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
13398a729477SBarry Smith {
134044cd7ae7SLois Curfman McInnes   Mat          B;
134144cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
134236ce4990SBarry Smith   int          ierr, i,sum[2],work[2],size;
1343416022c9SBarry Smith 
134444cd7ae7SLois Curfman McInnes   *A = 0;
134544cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
134644cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
134744cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
134844cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
134944cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
135036ce4990SBarry Smith   MPI_Comm_size(comm,&size);
135136ce4990SBarry Smith   if (size == 1) {
135236ce4990SBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
135336ce4990SBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
135436ce4990SBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
135536ce4990SBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
135636ce4990SBarry Smith     B->ops.lufactor          = MatLUFactor_MPIAIJ;
135736ce4990SBarry Smith     B->ops.solve             = MatSolve_MPIAIJ;
135836ce4990SBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
135936ce4990SBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
136036ce4990SBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
136136ce4990SBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
136236ce4990SBarry Smith   }
136344cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
136444cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
136544cd7ae7SLois Curfman McInnes   B->factor     = 0;
136644cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
136790f02eecSBarry Smith   B->mapping    = 0;
1368d6dfbf8fSBarry Smith 
136944cd7ae7SLois Curfman McInnes   b->insertmode = NOT_SET_VALUES;
137044cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
137144cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
13721eb62cbbSBarry Smith 
1373b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
13742e0155e0SLois Curfman McInnes     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
13751987afe7SBarry Smith 
1376dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
13771eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1378d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1379dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1380dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
13811eb62cbbSBarry Smith   }
138244cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
138344cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
138444cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
138544cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
138644cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
138744cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
13881eb62cbbSBarry Smith 
13891eb62cbbSBarry Smith   /* build local table of row and column ownerships */
139044cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
139144cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1392603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
139344cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
139444cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
139544cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
139644cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
13978a729477SBarry Smith   }
139844cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
139944cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
140044cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
140144cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
140244cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
140344cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
14048a729477SBarry Smith   }
140544cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
140644cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
14078a729477SBarry Smith 
14085ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
140944cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
141044cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
14117b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
141244cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
141344cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
14148a729477SBarry Smith 
14151eb62cbbSBarry Smith   /* build cache for off array entries formed */
141644cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
141790f02eecSBarry Smith   b->donotstash  = 0;
141844cd7ae7SLois Curfman McInnes   b->colmap      = 0;
141944cd7ae7SLois Curfman McInnes   b->garray      = 0;
142044cd7ae7SLois Curfman McInnes   b->roworiented = 1;
14218a729477SBarry Smith 
14221eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
142344cd7ae7SLois Curfman McInnes   b->lvec      = 0;
142444cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
14258a729477SBarry Smith 
14267a0afa10SBarry Smith   /* stuff for MatGetRow() */
142744cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
142844cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
142944cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
14307a0afa10SBarry Smith 
143144cd7ae7SLois Curfman McInnes   *A = B;
1432d6dfbf8fSBarry Smith   return 0;
1433d6dfbf8fSBarry Smith }
1434c74985f6SBarry Smith 
14353d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1436d6dfbf8fSBarry Smith {
1437d6dfbf8fSBarry Smith   Mat        mat;
1438416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1439a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1440d6dfbf8fSBarry Smith 
1441416022c9SBarry Smith   *newmat       = 0;
14420452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1443a5a9c739SBarry Smith   PLogObjectCreate(mat);
14440452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1445416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
144644a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
144744a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1448d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1449c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1450d6dfbf8fSBarry Smith 
145144cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
145244cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
145344cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
145444cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1455d6dfbf8fSBarry Smith 
1456416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1457416022c9SBarry Smith   a->rend         = oldmat->rend;
1458416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1459416022c9SBarry Smith   a->cend         = oldmat->cend;
146017699dbbSLois Curfman McInnes   a->size         = oldmat->size;
146117699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
146264119d58SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
1463bcd2baecSBarry Smith   a->rowvalues    = 0;
1464bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1465d6dfbf8fSBarry Smith 
1466603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1467603f58a4SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1468603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1469603f58a4SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1470416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14712ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
14720452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1473416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1474416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1475416022c9SBarry Smith   } else a->colmap = 0;
14763f41c07dSBarry Smith   if (oldmat->garray) {
14773f41c07dSBarry Smith     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
14783f41c07dSBarry Smith     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1479464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
14803f41c07dSBarry Smith     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1481416022c9SBarry Smith   } else a->garray = 0;
1482d6dfbf8fSBarry Smith 
1483416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1484416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1485a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1486416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1487416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1488416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1489416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1490416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14915dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
149225cdf11fSBarry Smith   if (flg) {
1493682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1494682d7d0cSBarry Smith   }
14958a729477SBarry Smith   *newmat = mat;
14968a729477SBarry Smith   return 0;
14978a729477SBarry Smith }
1498416022c9SBarry Smith 
149977c4ece6SBarry Smith #include "sys.h"
1500416022c9SBarry Smith 
150119bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1502416022c9SBarry Smith {
1503d65a2f8fSBarry Smith   Mat          A;
1504416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1505d65a2f8fSBarry Smith   Scalar       *vals,*svals;
150619bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1507416022c9SBarry Smith   MPI_Status   status;
150817699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1509d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
151019bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1511416022c9SBarry Smith 
151217699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
151317699dbbSLois Curfman McInnes   if (!rank) {
151490ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
151577c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1516c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1517416022c9SBarry Smith   }
1518416022c9SBarry Smith 
1519416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1520416022c9SBarry Smith   M = header[1]; N = header[2];
1521416022c9SBarry Smith   /* determine ownership of all rows */
152217699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
15230452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1524416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1525416022c9SBarry Smith   rowners[0] = 0;
152617699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1527416022c9SBarry Smith     rowners[i] += rowners[i-1];
1528416022c9SBarry Smith   }
152917699dbbSLois Curfman McInnes   rstart = rowners[rank];
153017699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1531416022c9SBarry Smith 
1532416022c9SBarry Smith   /* distribute row lengths to all processors */
15330452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1534416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
153517699dbbSLois Curfman McInnes   if (!rank) {
15360452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
153777c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
15380452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
153917699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1540d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
15410452661fSBarry Smith     PetscFree(sndcounts);
1542416022c9SBarry Smith   }
1543416022c9SBarry Smith   else {
1544416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1545416022c9SBarry Smith   }
1546416022c9SBarry Smith 
154717699dbbSLois Curfman McInnes   if (!rank) {
1548416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
15490452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1550cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
155117699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1552416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1553416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1554416022c9SBarry Smith       }
1555416022c9SBarry Smith     }
15560452661fSBarry Smith     PetscFree(rowlengths);
1557416022c9SBarry Smith 
1558416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1559416022c9SBarry Smith     maxnz = 0;
156017699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
15610452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1562416022c9SBarry Smith     }
15630452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1564416022c9SBarry Smith 
1565416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1566416022c9SBarry Smith     nz = procsnz[0];
15670452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
156877c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1569d65a2f8fSBarry Smith 
1570d65a2f8fSBarry Smith     /* read in every one elses and ship off */
157117699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1572d65a2f8fSBarry Smith       nz = procsnz[i];
157377c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1574d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1575d65a2f8fSBarry Smith     }
15760452661fSBarry Smith     PetscFree(cols);
1577416022c9SBarry Smith   }
1578416022c9SBarry Smith   else {
1579416022c9SBarry Smith     /* determine buffer space needed for message */
1580416022c9SBarry Smith     nz = 0;
1581416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1582416022c9SBarry Smith       nz += ourlens[i];
1583416022c9SBarry Smith     }
15840452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1585416022c9SBarry Smith 
1586416022c9SBarry Smith     /* receive message of column indices*/
1587d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1588416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1589c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1590416022c9SBarry Smith   }
1591416022c9SBarry Smith 
1592416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1593cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1594416022c9SBarry Smith   jj = 0;
1595416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1596416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1597d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1598416022c9SBarry Smith       jj++;
1599416022c9SBarry Smith     }
1600416022c9SBarry Smith   }
1601d65a2f8fSBarry Smith 
1602d65a2f8fSBarry Smith   /* create our matrix */
1603416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1604416022c9SBarry Smith     ourlens[i] -= offlens[i];
1605416022c9SBarry Smith   }
1606d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1607d65a2f8fSBarry Smith   A = *newmat;
16086d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
1609d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1610d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1611d65a2f8fSBarry Smith   }
1612416022c9SBarry Smith 
161317699dbbSLois Curfman McInnes   if (!rank) {
16140452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1615416022c9SBarry Smith 
1616416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1617416022c9SBarry Smith     nz = procsnz[0];
161877c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1619d65a2f8fSBarry Smith 
1620d65a2f8fSBarry Smith     /* insert into matrix */
1621d65a2f8fSBarry Smith     jj      = rstart;
1622d65a2f8fSBarry Smith     smycols = mycols;
1623d65a2f8fSBarry Smith     svals   = vals;
1624d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1625d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1626d65a2f8fSBarry Smith       smycols += ourlens[i];
1627d65a2f8fSBarry Smith       svals   += ourlens[i];
1628d65a2f8fSBarry Smith       jj++;
1629416022c9SBarry Smith     }
1630416022c9SBarry Smith 
1631d65a2f8fSBarry Smith     /* read in other processors and ship out */
163217699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1633416022c9SBarry Smith       nz = procsnz[i];
163477c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1635d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1636416022c9SBarry Smith     }
16370452661fSBarry Smith     PetscFree(procsnz);
1638416022c9SBarry Smith   }
1639d65a2f8fSBarry Smith   else {
1640d65a2f8fSBarry Smith     /* receive numeric values */
16410452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1642416022c9SBarry Smith 
1643d65a2f8fSBarry Smith     /* receive message of values*/
1644d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1645d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1646c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1647d65a2f8fSBarry Smith 
1648d65a2f8fSBarry Smith     /* insert into matrix */
1649d65a2f8fSBarry Smith     jj      = rstart;
1650d65a2f8fSBarry Smith     smycols = mycols;
1651d65a2f8fSBarry Smith     svals   = vals;
1652d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1653d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1654d65a2f8fSBarry Smith       smycols += ourlens[i];
1655d65a2f8fSBarry Smith       svals   += ourlens[i];
1656d65a2f8fSBarry Smith       jj++;
1657d65a2f8fSBarry Smith     }
1658d65a2f8fSBarry Smith   }
16590452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1660d65a2f8fSBarry Smith 
16616d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
16626d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1663416022c9SBarry Smith   return 0;
1664416022c9SBarry Smith }
1665