xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6d84be18fbb99ba69be7b8bdde5411a66955b7ea)
1cb512458SBarry Smith #ifndef lint
2*6d84be18SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.129 1996/02/28 00:12:20 bsmith Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int        ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
408a729477SBarry Smith {
4144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
434b0e389bSBarry Smith   Scalar     value;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
464b0e389bSBarry Smith   int        shift = C->indexshift,roworiented = aij->roworiented;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
534b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
544b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
564b0e389bSBarry Smith       row = im[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
584b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
594b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
604b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
614b0e389bSBarry Smith           col = in[j] - cstart;
624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
634b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
641eb62cbbSBarry Smith         }
651eb62cbbSBarry Smith         else {
66227d817aSBarry Smith           if (mat->was_assembled) {
67b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
684b0e389bSBarry Smith             col = aij->colmap[in[j]] + shift;
69ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
702493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
714b0e389bSBarry Smith               col =  in[j];
72d6dfbf8fSBarry Smith             }
739e25ed09SBarry Smith           }
744b0e389bSBarry Smith           else col = in[j];
754b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
764b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
771eb62cbbSBarry Smith         }
781eb62cbbSBarry Smith       }
791eb62cbbSBarry Smith     }
801eb62cbbSBarry Smith     else {
814b0e389bSBarry Smith       if (roworiented) {
824b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
834b0e389bSBarry Smith       }
844b0e389bSBarry Smith       else {
854b0e389bSBarry Smith         row = im[i];
864b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
874b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
884b0e389bSBarry Smith         }
894b0e389bSBarry Smith       }
901eb62cbbSBarry Smith     }
918a729477SBarry Smith   }
928a729477SBarry Smith   return 0;
938a729477SBarry Smith }
948a729477SBarry Smith 
95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96b49de8d1SLois Curfman McInnes {
97b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
101b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
102b49de8d1SLois Curfman McInnes 
103b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
104b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
107b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
108b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
109b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
112b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
113b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114b49de8d1SLois Curfman McInnes         }
115b49de8d1SLois Curfman McInnes         else {
116b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
117b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
118b49de8d1SLois Curfman McInnes         }
119b49de8d1SLois Curfman McInnes       }
120b49de8d1SLois Curfman McInnes     }
121b49de8d1SLois Curfman McInnes     else {
122b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
123b49de8d1SLois Curfman McInnes     }
124b49de8d1SLois Curfman McInnes   }
125b49de8d1SLois Curfman McInnes   return 0;
126b49de8d1SLois Curfman McInnes }
127b49de8d1SLois Curfman McInnes 
128fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1298a729477SBarry Smith {
13044a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
131d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
13217699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
13317699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1341eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1356abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1361eb62cbbSBarry Smith   InsertMode  addv;
1371eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1381eb62cbbSBarry Smith 
1391eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
140d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
141dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
142bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1431eb62cbbSBarry Smith   }
1441eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1451eb62cbbSBarry Smith 
1461eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1470452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
148cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1490452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1501eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1511eb62cbbSBarry Smith     idx = aij->stash.idx[i];
15217699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1531eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1541eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1558a729477SBarry Smith       }
1568a729477SBarry Smith     }
1578a729477SBarry Smith   }
15817699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1591eb62cbbSBarry Smith 
1601eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1610452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
16217699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
16317699dbbSLois Curfman McInnes   nreceives = work[rank];
16417699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
16517699dbbSLois Curfman McInnes   nmax = work[rank];
1660452661fSBarry Smith   PetscFree(work);
1671eb62cbbSBarry Smith 
1681eb62cbbSBarry Smith   /* post receives:
1691eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1701eb62cbbSBarry Smith      (global index,value) we store the global index as a double
171d6dfbf8fSBarry Smith      to simplify the message passing.
1721eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1731eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1741eb62cbbSBarry Smith      this is a lot of wasted space.
1751eb62cbbSBarry Smith 
1761eb62cbbSBarry Smith 
1771eb62cbbSBarry Smith        This could be done better.
1781eb62cbbSBarry Smith   */
1790452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
18078b31e54SBarry Smith   CHKPTRQ(rvalues);
1810452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
18278b31e54SBarry Smith   CHKPTRQ(recv_waits);
1831eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
184416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1851eb62cbbSBarry Smith               comm,recv_waits+i);
1861eb62cbbSBarry Smith   }
1871eb62cbbSBarry Smith 
1881eb62cbbSBarry Smith   /* do sends:
1891eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1901eb62cbbSBarry Smith          the ith processor
1911eb62cbbSBarry Smith   */
1920452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1930452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
19478b31e54SBarry Smith   CHKPTRQ(send_waits);
1950452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1961eb62cbbSBarry Smith   starts[0] = 0;
19717699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1981eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1991eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2001eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2011eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2021eb62cbbSBarry Smith   }
2030452661fSBarry Smith   PetscFree(owner);
2041eb62cbbSBarry Smith   starts[0] = 0;
20517699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2061eb62cbbSBarry Smith   count = 0;
20717699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2081eb62cbbSBarry Smith     if (procs[i]) {
209416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2101eb62cbbSBarry Smith                 comm,send_waits+count++);
2111eb62cbbSBarry Smith     }
2121eb62cbbSBarry Smith   }
2130452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2141eb62cbbSBarry Smith 
2151eb62cbbSBarry Smith   /* Free cache space */
2162191d07cSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
21778b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2181eb62cbbSBarry Smith 
2191eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2201eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2211eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2221eb62cbbSBarry Smith   aij->rmax       = nmax;
2231eb62cbbSBarry Smith 
2241eb62cbbSBarry Smith   return 0;
2251eb62cbbSBarry Smith }
22644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2271eb62cbbSBarry Smith 
228fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2291eb62cbbSBarry Smith {
23044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
231dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2321eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
233416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
234416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2351eb62cbbSBarry Smith   Scalar      *values,val;
2361eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2371eb62cbbSBarry Smith 
2381eb62cbbSBarry Smith   /*  wait on receives */
2391eb62cbbSBarry Smith   while (count) {
240d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2411eb62cbbSBarry Smith     /* unpack receives into our local space */
242d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
243c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2441eb62cbbSBarry Smith     n = n/3;
2451eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
246227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
247227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2481eb62cbbSBarry Smith       val = values[3*i+2];
2491eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2501eb62cbbSBarry Smith         col -= aij->cstart;
2511eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2521eb62cbbSBarry Smith       }
2531eb62cbbSBarry Smith       else {
254227d817aSBarry Smith         if (mat->was_assembled) {
255b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
256dbb450caSBarry Smith           col = aij->colmap[col] + shift;
257ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2582493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
259227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
260d6dfbf8fSBarry Smith           }
2619e25ed09SBarry Smith         }
2621eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2631eb62cbbSBarry Smith       }
2641eb62cbbSBarry Smith     }
2651eb62cbbSBarry Smith     count--;
2661eb62cbbSBarry Smith   }
2670452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2681eb62cbbSBarry Smith 
2691eb62cbbSBarry Smith   /* wait on sends */
2701eb62cbbSBarry Smith   if (aij->nsends) {
2710452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
27278b31e54SBarry Smith     CHKPTRQ(send_status);
2731eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2740452661fSBarry Smith     PetscFree(send_status);
2751eb62cbbSBarry Smith   }
2760452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2771eb62cbbSBarry Smith 
27864119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
27978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
28078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2811eb62cbbSBarry Smith 
2822493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2832493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
284227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
285227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
2862493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2872493cbb0SBarry Smith   }
2882493cbb0SBarry Smith 
289227d817aSBarry Smith   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
29078b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
2915e42470aSBarry Smith   }
29278b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
29378b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2945e42470aSBarry Smith 
2957a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
2968a729477SBarry Smith   return 0;
2978a729477SBarry Smith }
2988a729477SBarry Smith 
2992ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3001eb62cbbSBarry Smith {
30144a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
302dbd7a890SLois Curfman McInnes   int        ierr;
30378b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
30478b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3051eb62cbbSBarry Smith   return 0;
3061eb62cbbSBarry Smith }
3071eb62cbbSBarry Smith 
3081eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3091eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3101eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3111eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3121eb62cbbSBarry Smith    routine.
3131eb62cbbSBarry Smith */
31444a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3151eb62cbbSBarry Smith {
31644a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
31717699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3186abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
31917699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3205392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
321d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
322d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3231eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3241eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3251eb62cbbSBarry Smith   IS             istmp;
3261eb62cbbSBarry Smith 
32778b31e54SBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
32878b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3291eb62cbbSBarry Smith 
3301eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3310452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
332cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3330452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3341eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3351eb62cbbSBarry Smith     idx = rows[i];
3361eb62cbbSBarry Smith     found = 0;
33717699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3381eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3391eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3401eb62cbbSBarry Smith       }
3411eb62cbbSBarry Smith     }
342bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3431eb62cbbSBarry Smith   }
34417699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3451eb62cbbSBarry Smith 
3461eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3470452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
34817699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
34917699dbbSLois Curfman McInnes   nrecvs = work[rank];
35017699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
35117699dbbSLois Curfman McInnes   nmax = work[rank];
3520452661fSBarry Smith   PetscFree(work);
3531eb62cbbSBarry Smith 
3541eb62cbbSBarry Smith   /* post receives:   */
3550452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
35678b31e54SBarry Smith   CHKPTRQ(rvalues);
3570452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
35878b31e54SBarry Smith   CHKPTRQ(recv_waits);
3591eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
360416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3611eb62cbbSBarry Smith   }
3621eb62cbbSBarry Smith 
3631eb62cbbSBarry Smith   /* do sends:
3641eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3651eb62cbbSBarry Smith          the ith processor
3661eb62cbbSBarry Smith   */
3670452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3680452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
36978b31e54SBarry Smith   CHKPTRQ(send_waits);
3700452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3711eb62cbbSBarry Smith   starts[0] = 0;
37217699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3731eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3741eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3751eb62cbbSBarry Smith   }
3761eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3771eb62cbbSBarry Smith 
3781eb62cbbSBarry Smith   starts[0] = 0;
37917699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3801eb62cbbSBarry Smith   count = 0;
38117699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3821eb62cbbSBarry Smith     if (procs[i]) {
383416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3841eb62cbbSBarry Smith     }
3851eb62cbbSBarry Smith   }
3860452661fSBarry Smith   PetscFree(starts);
3871eb62cbbSBarry Smith 
38817699dbbSLois Curfman McInnes   base = owners[rank];
3891eb62cbbSBarry Smith 
3901eb62cbbSBarry Smith   /*  wait on receives */
3910452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3921eb62cbbSBarry Smith   source = lens + nrecvs;
3931eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3941eb62cbbSBarry Smith   while (count) {
395d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3961eb62cbbSBarry Smith     /* unpack receives into our local space */
3971eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
398d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
399d6dfbf8fSBarry Smith     lens[imdex]  = n;
4001eb62cbbSBarry Smith     slen += n;
4011eb62cbbSBarry Smith     count--;
4021eb62cbbSBarry Smith   }
4030452661fSBarry Smith   PetscFree(recv_waits);
4041eb62cbbSBarry Smith 
4051eb62cbbSBarry Smith   /* move the data into the send scatter */
4060452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4071eb62cbbSBarry Smith   count = 0;
4081eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4091eb62cbbSBarry Smith     values = rvalues + i*nmax;
4101eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4111eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4121eb62cbbSBarry Smith     }
4131eb62cbbSBarry Smith   }
4140452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4150452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4161eb62cbbSBarry Smith 
4171eb62cbbSBarry Smith   /* actually zap the local rows */
418416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
419464493b3SBarry Smith   PLogObjectParent(A,istmp);
4200452661fSBarry Smith   PetscFree(lrows);
42178b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
42278b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
42378b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4241eb62cbbSBarry Smith 
4251eb62cbbSBarry Smith   /* wait on sends */
4261eb62cbbSBarry Smith   if (nsends) {
4270452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
42878b31e54SBarry Smith     CHKPTRQ(send_status);
4291eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4300452661fSBarry Smith     PetscFree(send_status);
4311eb62cbbSBarry Smith   }
4320452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4331eb62cbbSBarry Smith 
4341eb62cbbSBarry Smith   return 0;
4351eb62cbbSBarry Smith }
4361eb62cbbSBarry Smith 
437416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4381eb62cbbSBarry Smith {
439416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
4401eb62cbbSBarry Smith   int        ierr;
441416022c9SBarry Smith 
44264119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
443416022c9SBarry Smith   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
44464119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
445416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4461eb62cbbSBarry Smith   return 0;
4471eb62cbbSBarry Smith }
4481eb62cbbSBarry Smith 
449416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
450da3a660dSBarry Smith {
451416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
452da3a660dSBarry Smith   int        ierr;
45364119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
454416022c9SBarry Smith   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
45564119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
456416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
457da3a660dSBarry Smith   return 0;
458da3a660dSBarry Smith }
459da3a660dSBarry Smith 
460416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
461da3a660dSBarry Smith {
462416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
463da3a660dSBarry Smith   int        ierr;
464da3a660dSBarry Smith 
465da3a660dSBarry Smith   /* do nondiagonal part */
466416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
467da3a660dSBarry Smith   /* send it on its way */
468416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
46964119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
470da3a660dSBarry Smith   /* do local part */
471416022c9SBarry Smith   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
472da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
473da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
474da3a660dSBarry Smith   /* but is not perhaps always true. */
475416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
47664119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
477da3a660dSBarry Smith   return 0;
478da3a660dSBarry Smith }
479da3a660dSBarry Smith 
480416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
481da3a660dSBarry Smith {
482416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
483da3a660dSBarry Smith   int        ierr;
484da3a660dSBarry Smith 
485da3a660dSBarry Smith   /* do nondiagonal part */
486416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
487da3a660dSBarry Smith   /* send it on its way */
488416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
48964119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
490da3a660dSBarry Smith   /* do local part */
491416022c9SBarry Smith   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
492da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
493da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
494da3a660dSBarry Smith   /* but is not perhaps always true. */
495416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
49664119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
497da3a660dSBarry Smith   return 0;
498da3a660dSBarry Smith }
499da3a660dSBarry Smith 
5001eb62cbbSBarry Smith /*
5011eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5021eb62cbbSBarry Smith    diagonal block
5031eb62cbbSBarry Smith */
504416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5051eb62cbbSBarry Smith {
506416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
507416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5081eb62cbbSBarry Smith }
5091eb62cbbSBarry Smith 
510052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
511052efed2SBarry Smith {
512052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
513052efed2SBarry Smith   int        ierr;
514052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
515052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
516052efed2SBarry Smith   return 0;
517052efed2SBarry Smith }
518052efed2SBarry Smith 
51944a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5201eb62cbbSBarry Smith {
5211eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
52244a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5231eb62cbbSBarry Smith   int        ierr;
524a5a9c739SBarry Smith #if defined(PETSC_LOG)
5256e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
526a5a9c739SBarry Smith #endif
5270452661fSBarry Smith   PetscFree(aij->rowners);
52878b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
52978b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5300452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5310452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5321eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
533a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5347a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
5350452661fSBarry Smith   PetscFree(aij);
536a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5370452661fSBarry Smith   PetscHeaderDestroy(mat);
5381eb62cbbSBarry Smith   return 0;
5391eb62cbbSBarry Smith }
5407857610eSBarry Smith #include "draw.h"
541b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
542ee50ffe9SBarry Smith 
543416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5441eb62cbbSBarry Smith {
545416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
546416022c9SBarry Smith   int         ierr;
547416022c9SBarry Smith 
54817699dbbSLois Curfman McInnes   if (aij->size == 1) {
549416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
550416022c9SBarry Smith   }
551a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
552416022c9SBarry Smith   return 0;
553416022c9SBarry Smith }
554416022c9SBarry Smith 
555d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
556416022c9SBarry Smith {
55744a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
558dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
559a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
560d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
561d636dbe3SBarry Smith   FILE        *fd;
562416022c9SBarry Smith 
563416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
564416022c9SBarry Smith     ierr = ViewerFileGetFormat_Private(viewer,&format);
56508480c60SBarry Smith     if (format == FILE_FORMAT_INFO_DETAILED) {
56695e01e2fSLois Curfman McInnes       int nz, nzalloc, mem, flg;
567a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
568227d817aSBarry Smith       ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
569a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
57095e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
571a56f8943SBarry Smith       MPIU_Seq_begin(mat->comm,1);
57295e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
57395e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
57495e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
57595e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
57608480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
57795e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz);
57808480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
57995e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz);
580a56f8943SBarry Smith       fflush(fd);
581a56f8943SBarry Smith       MPIU_Seq_end(mat->comm,1);
582a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
583416022c9SBarry Smith       return 0;
584416022c9SBarry Smith     }
58508480c60SBarry Smith     else if (format == FILE_FORMAT_INFO) {
58608480c60SBarry Smith       return 0;
58708480c60SBarry Smith     }
588416022c9SBarry Smith   }
589416022c9SBarry Smith 
590416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER) {
591227d817aSBarry Smith     ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
5927f813858SBarry Smith     MPIU_Seq_begin(mat->comm,1);
593d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
59417699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5951eb62cbbSBarry Smith            aij->cend);
59678b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
59778b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
598d13ab20cSBarry Smith     fflush(fd);
5997f813858SBarry Smith     MPIU_Seq_end(mat->comm,1);
600d13ab20cSBarry Smith   }
601416022c9SBarry Smith   else {
602a56f8943SBarry Smith     int size = aij->size;
603a56f8943SBarry Smith     rank = aij->rank;
60417699dbbSLois Curfman McInnes     if (size == 1) {
60578b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
60695373324SBarry Smith     }
60795373324SBarry Smith     else {
60895373324SBarry Smith       /* assemble the entire matrix onto first processor. */
60995373324SBarry Smith       Mat         A;
610ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6112eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
61295373324SBarry Smith       Scalar      *a;
6132ee70a88SLois Curfman McInnes 
61417699dbbSLois Curfman McInnes       if (!rank) {
615b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
616c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
61795373324SBarry Smith       }
61895373324SBarry Smith       else {
619b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
620c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
62195373324SBarry Smith       }
622464493b3SBarry Smith       PLogObjectParent(mat,A);
623416022c9SBarry Smith 
62495373324SBarry Smith       /* copy over the A part */
625ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6262ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
62795373324SBarry Smith       row = aij->rstart;
628dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
62995373324SBarry Smith       for ( i=0; i<m; i++ ) {
630416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
63195373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
63295373324SBarry Smith       }
6332ee70a88SLois Curfman McInnes       aj = Aloc->j;
634dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
63595373324SBarry Smith 
63695373324SBarry Smith       /* copy over the B part */
637ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6382ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
63995373324SBarry Smith       row = aij->rstart;
6400452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
641dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
64295373324SBarry Smith       for ( i=0; i<m; i++ ) {
643416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
64495373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
64595373324SBarry Smith       }
6460452661fSBarry Smith       PetscFree(ct);
64778b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
64878b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
64917699dbbSLois Curfman McInnes       if (!rank) {
65078b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
65195373324SBarry Smith       }
65278b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
65395373324SBarry Smith     }
65495373324SBarry Smith   }
6551eb62cbbSBarry Smith   return 0;
6561eb62cbbSBarry Smith }
6571eb62cbbSBarry Smith 
658416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
659416022c9SBarry Smith {
660416022c9SBarry Smith   Mat         mat = (Mat) obj;
661416022c9SBarry Smith   int         ierr;
662416022c9SBarry Smith   PetscObject vobj = (PetscObject) viewer;
663416022c9SBarry Smith 
664416022c9SBarry Smith   if (!viewer) {
665416022c9SBarry Smith     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
666416022c9SBarry Smith   }
667416022c9SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
668416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
669d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
670416022c9SBarry Smith   }
671416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
672d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
673d7e8b826SBarry Smith   }
674d7e8b826SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
675d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
676416022c9SBarry Smith   }
677416022c9SBarry Smith   else if (vobj->cookie == DRAW_COOKIE) {
678d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
679416022c9SBarry Smith   }
680416022c9SBarry Smith   else if (vobj->type == BINARY_FILE_VIEWER) {
681416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
682416022c9SBarry Smith   }
683416022c9SBarry Smith   return 0;
684416022c9SBarry Smith }
685416022c9SBarry Smith 
686ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat);
6871eb62cbbSBarry Smith /*
6881eb62cbbSBarry Smith     This has to provide several versions.
6891eb62cbbSBarry Smith 
6901eb62cbbSBarry Smith      1) per sequential
6911eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6921eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
693d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
6941eb62cbbSBarry Smith */
695fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
696dbb450caSBarry Smith                            double fshift,int its,Vec xx)
6978a729477SBarry Smith {
69844a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
699d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
700ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
7016abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
7026abc6512SBarry Smith   int        ierr,*idx, *diag;
703416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
704da3a660dSBarry Smith   Vec        tt;
7058a729477SBarry Smith 
706d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
707dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
708dbb450caSBarry Smith   ls = ls + shift;
709ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
710d6dfbf8fSBarry Smith   diag = A->diag;
711acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
71248d91487SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
713acb40c82SBarry Smith   }
714da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
715da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
716da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
717da3a660dSBarry Smith 
718da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
719da3a660dSBarry Smith 
720da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
721da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
722da3a660dSBarry Smith     is the relaxation factor.
723da3a660dSBarry Smith     */
72478b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
725464493b3SBarry Smith     PLogObjectParent(matin,tt);
726da3a660dSBarry Smith     VecGetArray(tt,&t);
727da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
728da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
729da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
73064119d58SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
73178b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
732da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
733da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
734dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
735dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
736da3a660dSBarry Smith       sum  = b[i];
737da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
738dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
739da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
740dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
741dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
742da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
743da3a660dSBarry Smith       x[i] = omega*(sum/d);
744da3a660dSBarry Smith     }
74564119d58SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
74678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
747da3a660dSBarry Smith 
748da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
749da3a660dSBarry Smith     v = A->a;
750dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
751da3a660dSBarry Smith 
752da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
753dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
754da3a660dSBarry Smith     diag = A->diag;
755da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
75664119d58SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
75778b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
758da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
759da3a660dSBarry Smith       n    = diag[i] - A->i[i];
760dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
761dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
762da3a660dSBarry Smith       sum  = t[i];
763da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
764dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
765da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
766dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
767dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
768da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
769da3a660dSBarry Smith       t[i] = omega*(sum/d);
770da3a660dSBarry Smith     }
77164119d58SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
77278b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
773da3a660dSBarry Smith     /*  x = x + t */
774da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
775da3a660dSBarry Smith     VecDestroy(tt);
776da3a660dSBarry Smith     return 0;
777da3a660dSBarry Smith   }
778da3a660dSBarry Smith 
7791eb62cbbSBarry Smith 
780d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
781da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
782da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
783da3a660dSBarry Smith     }
784da3a660dSBarry Smith     else {
78564119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
78678b31e54SBarry Smith       CHKERRQ(ierr);
78764119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
78878b31e54SBarry Smith       CHKERRQ(ierr);
789da3a660dSBarry Smith     }
790d6dfbf8fSBarry Smith     while (its--) {
791d6dfbf8fSBarry Smith       /* go down through the rows */
79264119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
79378b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
794d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
795d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
796dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
797dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
798d6dfbf8fSBarry Smith         sum  = b[i];
799d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
800dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
801d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
802dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
803dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
804d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
805d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
806d6dfbf8fSBarry Smith       }
80764119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
80878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
809d6dfbf8fSBarry Smith       /* come up through the rows */
81064119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
81178b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
812d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
813d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
814dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
815dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
816d6dfbf8fSBarry Smith         sum  = b[i];
817d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
818dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
819d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
820dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
821dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
822d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
823d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
824d6dfbf8fSBarry Smith       }
82564119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
82678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
827d6dfbf8fSBarry Smith     }
828d6dfbf8fSBarry Smith   }
829d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
830da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
831da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
83264119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
83378b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
834da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
835da3a660dSBarry Smith         n    = diag[i] - A->i[i];
836dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
837dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
838da3a660dSBarry Smith         sum  = b[i];
839da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
840dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
841da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
842dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
843dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
844da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
845da3a660dSBarry Smith         x[i] = omega*(sum/d);
846da3a660dSBarry Smith       }
84764119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
84878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
849da3a660dSBarry Smith       its--;
850da3a660dSBarry Smith     }
851d6dfbf8fSBarry Smith     while (its--) {
85264119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
85378b31e54SBarry Smith       CHKERRQ(ierr);
85464119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
85578b31e54SBarry Smith       CHKERRQ(ierr);
85664119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
85778b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
858d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
859d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
860dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
861dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
862d6dfbf8fSBarry Smith         sum  = b[i];
863d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
864dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
865d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
866dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
867dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
868d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
869d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
870d6dfbf8fSBarry Smith       }
87164119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
87278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
873d6dfbf8fSBarry Smith     }
874d6dfbf8fSBarry Smith   }
875d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
876da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
877da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
87864119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
87978b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
880da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
881da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
882dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
883dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
884da3a660dSBarry Smith         sum  = b[i];
885da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
886dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
887da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
888dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
889dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
890da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
891da3a660dSBarry Smith         x[i] = omega*(sum/d);
892da3a660dSBarry Smith       }
89364119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
89478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
895da3a660dSBarry Smith       its--;
896da3a660dSBarry Smith     }
897d6dfbf8fSBarry Smith     while (its--) {
89864119d58SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
89978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
90064119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
90178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
90264119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
90378b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
904d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
905d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
906dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
907dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
908d6dfbf8fSBarry Smith         sum  = b[i];
909d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
910dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
911d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
912dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
913dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
914d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
915d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
916d6dfbf8fSBarry Smith       }
91764119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
91878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
919d6dfbf8fSBarry Smith     }
920d6dfbf8fSBarry Smith   }
921d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
922da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
923dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
924da3a660dSBarry Smith     }
92564119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
92678b31e54SBarry Smith     CHKERRQ(ierr);
92764119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
92878b31e54SBarry Smith     CHKERRQ(ierr);
929d6dfbf8fSBarry Smith     while (its--) {
930d6dfbf8fSBarry Smith       /* go down through the rows */
931d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
932d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
933dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
934dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
935d6dfbf8fSBarry Smith         sum  = b[i];
936d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
937dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
938d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
939dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
940dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
941d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
942d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
943d6dfbf8fSBarry Smith       }
944d6dfbf8fSBarry Smith       /* come up through the rows */
945d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
946d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
947dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
948dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
949d6dfbf8fSBarry Smith         sum  = b[i];
950d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
951dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
952d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
953dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
954dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
955d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
956d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
957d6dfbf8fSBarry Smith       }
958d6dfbf8fSBarry Smith     }
959d6dfbf8fSBarry Smith   }
960d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
961da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
962dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
963da3a660dSBarry Smith     }
96464119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
96578b31e54SBarry Smith     CHKERRQ(ierr);
96664119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
96778b31e54SBarry Smith     CHKERRQ(ierr);
968d6dfbf8fSBarry Smith     while (its--) {
969d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
970d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
971dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
972dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
973d6dfbf8fSBarry Smith         sum  = b[i];
974d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
975dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
976d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
977dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
978dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
979d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
980d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
981d6dfbf8fSBarry Smith       }
982d6dfbf8fSBarry Smith     }
983d6dfbf8fSBarry Smith   }
984d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
985da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
986dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
987da3a660dSBarry Smith     }
98864119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
98978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
99064119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
99178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
992d6dfbf8fSBarry Smith     while (its--) {
993d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
994d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
995dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
996dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
997d6dfbf8fSBarry Smith         sum  = b[i];
998d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
999dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
1000d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
1001dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
1002dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
1003d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1004d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1005d6dfbf8fSBarry Smith       }
1006d6dfbf8fSBarry Smith     }
1007d6dfbf8fSBarry Smith   }
10088a729477SBarry Smith   return 0;
10098a729477SBarry Smith }
1010a66be287SLois Curfman McInnes 
1011fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1012fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
1013a66be287SLois Curfman McInnes {
1014a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1015a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
1016a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1017a66be287SLois Curfman McInnes 
101878b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
101978b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1020a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1021a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
1022a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1023a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1024d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1025a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1026a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1027d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1028a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1029a66be287SLois Curfman McInnes   }
1030a66be287SLois Curfman McInnes   return 0;
1031a66be287SLois Curfman McInnes }
1032a66be287SLois Curfman McInnes 
1033299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1034299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1035299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1036299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1037299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1038299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1039299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1040299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1041299609e3SLois Curfman McInnes 
1042416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1043c74985f6SBarry Smith {
1044c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1045c74985f6SBarry Smith 
1046c0bbcb79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
1047c0bbcb79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
1048c0bbcb79SLois Curfman McInnes       op == COLUMNS_SORTED ||
1049c0bbcb79SLois Curfman McInnes       op == ROW_ORIENTED) {
1050c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
1051c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
1052c74985f6SBarry Smith   }
1053c0bbcb79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
1054c0bbcb79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
1055c0bbcb79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1056c0bbcb79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
1057c0bbcb79SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10584b0e389bSBarry Smith   else if (op == COLUMN_ORIENTED) {
10594b0e389bSBarry Smith     a->roworiented = 0;
10604b0e389bSBarry Smith     MatSetOption(a->A,op);
10614b0e389bSBarry Smith     MatSetOption(a->B,op);
10624b0e389bSBarry Smith   }
1063c0bbcb79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
10644d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1065c0bbcb79SLois Curfman McInnes   else
10664d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1067c74985f6SBarry Smith   return 0;
1068c74985f6SBarry Smith }
1069c74985f6SBarry Smith 
1070d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1071c74985f6SBarry Smith {
107244a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1073c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1074c74985f6SBarry Smith   return 0;
1075c74985f6SBarry Smith }
1076c74985f6SBarry Smith 
1077d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1078c74985f6SBarry Smith {
107944a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1080b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1081c74985f6SBarry Smith   return 0;
1082c74985f6SBarry Smith }
1083c74985f6SBarry Smith 
1084d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1085c74985f6SBarry Smith {
108644a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1087c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1088c74985f6SBarry Smith   return 0;
1089c74985f6SBarry Smith }
1090c74985f6SBarry Smith 
1091*6d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1092*6d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1093*6d84be18SBarry Smith 
1094*6d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
109539e00950SLois Curfman McInnes {
1096154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
109770f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1098154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1099154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
110070f0671dSBarry Smith   int        *cmap, *idx_p;
110139e00950SLois Curfman McInnes 
11027a0afa10SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
11037a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
11047a0afa10SBarry Smith 
110570f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
11067a0afa10SBarry Smith     /*
11077a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
11087a0afa10SBarry Smith     */
11097a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
11107a0afa10SBarry Smith     int     max = 1,n = mat->n,tmp;
11117a0afa10SBarry Smith     for ( i=0; i<n; i++ ) {
11127a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
11137a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
11147a0afa10SBarry Smith     }
11157a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
11167a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
11177a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
11187a0afa10SBarry Smith   }
11197a0afa10SBarry Smith 
11207a0afa10SBarry Smith 
1121b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1122abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
112339e00950SLois Curfman McInnes 
1124154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1125154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1126154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1127*6d84be18SBarry Smith   ierr = MatGetRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1128*6d84be18SBarry Smith   ierr = MatGetRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1129154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1130154123eaSLois Curfman McInnes 
113170f0671dSBarry Smith   cmap  = mat->garray;
1132154123eaSLois Curfman McInnes   if (v  || idx) {
1133154123eaSLois Curfman McInnes     if (nztot) {
1134154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
113570f0671dSBarry Smith       int imark = -1;
1136154123eaSLois Curfman McInnes       if (v) {
113770f0671dSBarry Smith         *v = v_p = mat->rowvalues;
113839e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
113970f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1140154123eaSLois Curfman McInnes           else break;
1141154123eaSLois Curfman McInnes         }
1142154123eaSLois Curfman McInnes         imark = i;
114370f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
114470f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1145154123eaSLois Curfman McInnes       }
1146154123eaSLois Curfman McInnes       if (idx) {
114770f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
114870f0671dSBarry Smith         if (imark > -1) {
114970f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
115070f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
115170f0671dSBarry Smith           }
115270f0671dSBarry Smith         } else {
1153154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
115470f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1155154123eaSLois Curfman McInnes             else break;
1156154123eaSLois Curfman McInnes           }
1157154123eaSLois Curfman McInnes           imark = i;
115870f0671dSBarry Smith         }
115970f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
116070f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
116139e00950SLois Curfman McInnes       }
116239e00950SLois Curfman McInnes     }
116339e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1164154123eaSLois Curfman McInnes   }
116539e00950SLois Curfman McInnes   *nz = nztot;
1166*6d84be18SBarry Smith   ierr = MatRestoreRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1167*6d84be18SBarry Smith   ierr = MatRestoreRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
116839e00950SLois Curfman McInnes   return 0;
116939e00950SLois Curfman McInnes }
117039e00950SLois Curfman McInnes 
1171*6d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
117239e00950SLois Curfman McInnes {
11737a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
11747a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
11757a0afa10SBarry Smith     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
11767a0afa10SBarry Smith   }
11777a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
117839e00950SLois Curfman McInnes   return 0;
117939e00950SLois Curfman McInnes }
118039e00950SLois Curfman McInnes 
1181cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1182855ac2c5SLois Curfman McInnes {
1183855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1184ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1185416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1186416022c9SBarry Smith   double     sum = 0.0;
118704ca555eSLois Curfman McInnes   Scalar     *v;
118804ca555eSLois Curfman McInnes 
118917699dbbSLois Curfman McInnes   if (aij->size == 1) {
119014183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
119137fa93a5SLois Curfman McInnes   } else {
119204ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
119304ca555eSLois Curfman McInnes       v = amat->a;
119404ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
119504ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
119604ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
119704ca555eSLois Curfman McInnes #else
119804ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
119904ca555eSLois Curfman McInnes #endif
120004ca555eSLois Curfman McInnes       }
120104ca555eSLois Curfman McInnes       v = bmat->a;
120204ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
120304ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
120404ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
120504ca555eSLois Curfman McInnes #else
120604ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
120704ca555eSLois Curfman McInnes #endif
120804ca555eSLois Curfman McInnes       }
1209*6d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
121004ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
121104ca555eSLois Curfman McInnes     }
121204ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
121304ca555eSLois Curfman McInnes       double *tmp, *tmp2;
121404ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
12150452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
12160452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1217cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
121804ca555eSLois Curfman McInnes       *norm = 0.0;
121904ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
122004ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1221579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
122204ca555eSLois Curfman McInnes       }
122304ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
122404ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1225579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
122604ca555eSLois Curfman McInnes       }
1227*6d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
122804ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
122904ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
123004ca555eSLois Curfman McInnes       }
12310452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
123204ca555eSLois Curfman McInnes     }
123304ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1234515d9167SLois Curfman McInnes       double ntemp = 0.0;
123504ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1236dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
123704ca555eSLois Curfman McInnes         sum = 0.0;
123804ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1239cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
124004ca555eSLois Curfman McInnes         }
1241dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
124204ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1243cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
124404ca555eSLois Curfman McInnes         }
1245515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
124604ca555eSLois Curfman McInnes       }
1247*6d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
124804ca555eSLois Curfman McInnes     }
124904ca555eSLois Curfman McInnes     else {
1250515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
125104ca555eSLois Curfman McInnes     }
125237fa93a5SLois Curfman McInnes   }
1253855ac2c5SLois Curfman McInnes   return 0;
1254855ac2c5SLois Curfman McInnes }
1255855ac2c5SLois Curfman McInnes 
12560de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1257b7c46309SBarry Smith {
1258b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1259dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1260416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1261416022c9SBarry Smith   Mat        B;
1262b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1263b7c46309SBarry Smith   Scalar     *array;
1264b7c46309SBarry Smith 
12653638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
12663638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1267b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1268b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1269b7c46309SBarry Smith 
1270b7c46309SBarry Smith   /* copy over the A part */
1271ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1272b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1273b7c46309SBarry Smith   row = a->rstart;
1274dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1275b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1276416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1277b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1278b7c46309SBarry Smith   }
1279b7c46309SBarry Smith   aj = Aloc->j;
12804af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1281b7c46309SBarry Smith 
1282b7c46309SBarry Smith   /* copy over the B part */
1283ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1284b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1285b7c46309SBarry Smith   row = a->rstart;
12860452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1287dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1288b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1289416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1290b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1291b7c46309SBarry Smith   }
12920452661fSBarry Smith   PetscFree(ct);
1293b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1294b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
12953638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
12960de55854SLois Curfman McInnes     *matout = B;
12970de55854SLois Curfman McInnes   } else {
12980de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12990452661fSBarry Smith     PetscFree(a->rowners);
13000de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
13010de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
13020452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
13030452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
13040de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1305a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
13060452661fSBarry Smith     PetscFree(a);
1307416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
13080452661fSBarry Smith     PetscHeaderDestroy(B);
13090de55854SLois Curfman McInnes   }
1310b7c46309SBarry Smith   return 0;
1311b7c46309SBarry Smith }
1312b7c46309SBarry Smith 
1313682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1314682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1315682d7d0cSBarry Smith {
1316682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1317682d7d0cSBarry Smith 
1318682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1319682d7d0cSBarry Smith   else return 0;
1320682d7d0cSBarry Smith }
1321682d7d0cSBarry Smith 
1322fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
13233d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
13242f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1325598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
13268a729477SBarry Smith /* -------------------------------------------------------------------*/
13272ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
132839e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
132944a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
133044a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1331299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1332299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1333299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
133444a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1335b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1336a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1337855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1338ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13391eb62cbbSBarry Smith        0,
1340299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1341299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1342299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1343d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1344299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
13453d1612f7SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1346b49de8d1SLois Curfman McInnes        0,0,0,
1347598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1348052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
1349052efed2SBarry Smith        MatScale_MPIAIJ};
13508a729477SBarry Smith 
13511987afe7SBarry Smith /*@C
1352ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
13533a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
13543a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
13553a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
13563a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
13578a729477SBarry Smith 
13588a729477SBarry Smith    Input Parameters:
13591eb62cbbSBarry Smith .  comm - MPI communicator
13607d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
13617d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
13627d3e4905SLois Curfman McInnes            if N is given)
13637d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
13647d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
13657d3e4905SLois Curfman McInnes            if n is given)
1366ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1367ff756334SLois Curfman McInnes            (same for all local rows)
1368ab693e5aSLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1369ab693e5aSLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1370ab693e5aSLois Curfman McInnes            for the diagonal entry even if it is zero.
1371ab693e5aSLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1372ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
1373ab693e5aSLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1374ab693e5aSLois Curfman McInnes            submatrix or null (possibly different for each row).
13758a729477SBarry Smith 
1376ff756334SLois Curfman McInnes    Output Parameter:
13778a729477SBarry Smith .  newmat - the matrix
13788a729477SBarry Smith 
1379ff756334SLois Curfman McInnes    Notes:
1380ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1381ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
13820002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
13830002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1384ff756334SLois Curfman McInnes 
1385ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1386ff756334SLois Curfman McInnes    (possibly both).
1387ff756334SLois Curfman McInnes 
13885511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
13895511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
13905511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
13915511cfe3SLois Curfman McInnes 
13925511cfe3SLois Curfman McInnes    Options Database Keys:
13935511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
13946e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
13956e62573dSLois Curfman McInnes $        (max limit=5)
13966e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
13976e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
13986e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
13995511cfe3SLois Curfman McInnes 
1400e0245417SLois Curfman McInnes    Storage Information:
1401e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1402e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1403e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1404e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1405e0245417SLois Curfman McInnes 
1406e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
14075ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
14085ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
14095ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
14105ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1411ff756334SLois Curfman McInnes 
14125511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
14135511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
14142191d07cSBarry Smith 
1415b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1416b810aeb4SBarry Smith $         -------------------
14175511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
14185511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
14195511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
14205511cfe3SLois Curfman McInnes $         -------------------
1421b810aeb4SBarry Smith $
1422b810aeb4SBarry Smith 
14235511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
14245511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
14255511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
14265511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
14275511cfe3SLois Curfman McInnes 
14285511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
14295511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
14305511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
14313a511b96SLois Curfman McInnes    one expects d_nz >> o_nz.   For additional details, see the users manual
14323a511b96SLois Curfman McInnes    chapter on matrices and the file $(PETSC_DIR)/Performance.
14333a511b96SLois Curfman McInnes 
1434dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1435ff756334SLois Curfman McInnes 
1436fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
14378a729477SBarry Smith @*/
14381eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
14391eb62cbbSBarry Smith                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
14408a729477SBarry Smith {
14418a729477SBarry Smith   Mat          mat;
1442416022c9SBarry Smith   Mat_MPIAIJ   *a;
14436abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1444416022c9SBarry Smith 
14458a729477SBarry Smith   *newmat         = 0;
14460452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1447a5a9c739SBarry Smith   PLogObjectCreate(mat);
14480452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1449416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
145044a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
145144a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
14528a729477SBarry Smith   mat->factor     = 0;
1453c456f294SBarry Smith   mat->assembled  = PETSC_FALSE;
1454d6dfbf8fSBarry Smith 
145564119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
145617699dbbSLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
145717699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
14581eb62cbbSBarry Smith 
1459b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
14601987afe7SBarry Smith     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
14611987afe7SBarry Smith 
1462dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
14631eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1464d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1465dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1466dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
14671eb62cbbSBarry Smith   }
146817699dbbSLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
146917699dbbSLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1470416022c9SBarry Smith   a->m = m;
1471416022c9SBarry Smith   a->n = n;
1472416022c9SBarry Smith   a->N = N;
1473416022c9SBarry Smith   a->M = M;
14741eb62cbbSBarry Smith 
14751eb62cbbSBarry Smith   /* build local table of row and column ownerships */
14760452661fSBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1477579c6b6fSBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
147817699dbbSLois Curfman McInnes   a->cowners = a->rowners + a->size + 1;
1479416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1480416022c9SBarry Smith   a->rowners[0] = 0;
148117699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1482416022c9SBarry Smith     a->rowners[i] += a->rowners[i-1];
14838a729477SBarry Smith   }
148417699dbbSLois Curfman McInnes   a->rstart = a->rowners[a->rank];
148517699dbbSLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
1486416022c9SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1487416022c9SBarry Smith   a->cowners[0] = 0;
148817699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1489416022c9SBarry Smith     a->cowners[i] += a->cowners[i-1];
14908a729477SBarry Smith   }
149117699dbbSLois Curfman McInnes   a->cstart = a->cowners[a->rank];
149217699dbbSLois Curfman McInnes   a->cend   = a->cowners[a->rank+1];
14938a729477SBarry Smith 
14945ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1495416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1496416022c9SBarry Smith   PLogObjectParent(mat,a->A);
14977b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1498416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1499416022c9SBarry Smith   PLogObjectParent(mat,a->B);
15008a729477SBarry Smith 
15011eb62cbbSBarry Smith   /* build cache for off array entries formed */
1502416022c9SBarry Smith   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1503416022c9SBarry Smith   a->colmap      = 0;
1504416022c9SBarry Smith   a->garray      = 0;
15054b0e389bSBarry Smith   a->roworiented = 1;
15068a729477SBarry Smith 
15071eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
1508416022c9SBarry Smith   a->lvec      = 0;
1509416022c9SBarry Smith   a->Mvctx     = 0;
15108a729477SBarry Smith 
15117a0afa10SBarry Smith   /* stuff for MatGetRow() */
15127a0afa10SBarry Smith   a->rowindices   = 0;
15137a0afa10SBarry Smith   a->rowvalues    = 0;
15147a0afa10SBarry Smith   a->getrowactive = PETSC_FALSE;
15157a0afa10SBarry Smith 
1516d6dfbf8fSBarry Smith   *newmat = mat;
1517d6dfbf8fSBarry Smith   return 0;
1518d6dfbf8fSBarry Smith }
1519c74985f6SBarry Smith 
15203d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1521d6dfbf8fSBarry Smith {
1522d6dfbf8fSBarry Smith   Mat        mat;
1523416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
152425cdf11fSBarry Smith   int        ierr, len,flg;
1525d6dfbf8fSBarry Smith 
1526416022c9SBarry Smith   *newmat       = 0;
15270452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1528a5a9c739SBarry Smith   PLogObjectCreate(mat);
15290452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1530416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
153144a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
153244a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1533d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1534c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1535d6dfbf8fSBarry Smith 
1536416022c9SBarry Smith   a->m          = oldmat->m;
1537416022c9SBarry Smith   a->n          = oldmat->n;
1538416022c9SBarry Smith   a->M          = oldmat->M;
1539416022c9SBarry Smith   a->N          = oldmat->N;
1540d6dfbf8fSBarry Smith 
1541416022c9SBarry Smith   a->rstart     = oldmat->rstart;
1542416022c9SBarry Smith   a->rend       = oldmat->rend;
1543416022c9SBarry Smith   a->cstart     = oldmat->cstart;
1544416022c9SBarry Smith   a->cend       = oldmat->cend;
154517699dbbSLois Curfman McInnes   a->size       = oldmat->size;
154617699dbbSLois Curfman McInnes   a->rank       = oldmat->rank;
154764119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
1548d6dfbf8fSBarry Smith 
15490452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
155017699dbbSLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
155117699dbbSLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1552416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
15532ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
15540452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1555416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1556416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1557416022c9SBarry Smith   } else a->colmap = 0;
1558ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
15590452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1560464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1561416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1562416022c9SBarry Smith   } else a->garray = 0;
1563d6dfbf8fSBarry Smith 
1564416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1565416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1566a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1567416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1568416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1569416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1570416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1571416022c9SBarry Smith   PLogObjectParent(mat,a->B);
15725dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
157325cdf11fSBarry Smith   if (flg) {
1574682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1575682d7d0cSBarry Smith   }
15768a729477SBarry Smith   *newmat = mat;
15778a729477SBarry Smith   return 0;
15788a729477SBarry Smith }
1579416022c9SBarry Smith 
1580416022c9SBarry Smith #include "sysio.h"
1581416022c9SBarry Smith 
1582c456f294SBarry Smith int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat)
1583416022c9SBarry Smith {
1584d65a2f8fSBarry Smith   Mat          A;
1585416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1586d65a2f8fSBarry Smith   Scalar       *vals,*svals;
1587416022c9SBarry Smith   PetscObject  vobj = (PetscObject) bview;
1588416022c9SBarry Smith   MPI_Comm     comm = vobj->comm;
1589416022c9SBarry Smith   MPI_Status   status;
159017699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1591d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1592d65a2f8fSBarry Smith   int          tag = ((PetscObject)bview)->tag;
1593416022c9SBarry Smith 
159417699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
159517699dbbSLois Curfman McInnes   if (!rank) {
1596416022c9SBarry Smith     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1597416022c9SBarry Smith     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1598c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1599416022c9SBarry Smith   }
1600416022c9SBarry Smith 
1601416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1602416022c9SBarry Smith   M = header[1]; N = header[2];
1603416022c9SBarry Smith   /* determine ownership of all rows */
160417699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
16050452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1606416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1607416022c9SBarry Smith   rowners[0] = 0;
160817699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1609416022c9SBarry Smith     rowners[i] += rowners[i-1];
1610416022c9SBarry Smith   }
161117699dbbSLois Curfman McInnes   rstart = rowners[rank];
161217699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1613416022c9SBarry Smith 
1614416022c9SBarry Smith   /* distribute row lengths to all processors */
16150452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1616416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
161717699dbbSLois Curfman McInnes   if (!rank) {
16180452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1619416022c9SBarry Smith     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
16200452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
162117699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1622d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
16230452661fSBarry Smith     PetscFree(sndcounts);
1624416022c9SBarry Smith   }
1625416022c9SBarry Smith   else {
1626416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1627416022c9SBarry Smith   }
1628416022c9SBarry Smith 
162917699dbbSLois Curfman McInnes   if (!rank) {
1630416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
16310452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1632cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
163317699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1634416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1635416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1636416022c9SBarry Smith       }
1637416022c9SBarry Smith     }
16380452661fSBarry Smith     PetscFree(rowlengths);
1639416022c9SBarry Smith 
1640416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1641416022c9SBarry Smith     maxnz = 0;
164217699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
16430452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1644416022c9SBarry Smith     }
16450452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1646416022c9SBarry Smith 
1647416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1648416022c9SBarry Smith     nz = procsnz[0];
16490452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1650d65a2f8fSBarry Smith     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1651d65a2f8fSBarry Smith 
1652d65a2f8fSBarry Smith     /* read in every one elses and ship off */
165317699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1654d65a2f8fSBarry Smith       nz = procsnz[i];
1655416022c9SBarry Smith       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1656d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1657d65a2f8fSBarry Smith     }
16580452661fSBarry Smith     PetscFree(cols);
1659416022c9SBarry Smith   }
1660416022c9SBarry Smith   else {
1661416022c9SBarry Smith     /* determine buffer space needed for message */
1662416022c9SBarry Smith     nz = 0;
1663416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1664416022c9SBarry Smith       nz += ourlens[i];
1665416022c9SBarry Smith     }
16660452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1667416022c9SBarry Smith 
1668416022c9SBarry Smith     /* receive message of column indices*/
1669d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1670416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1671c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1672416022c9SBarry Smith   }
1673416022c9SBarry Smith 
1674416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1675cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1676416022c9SBarry Smith   jj = 0;
1677416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1678416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1679d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1680416022c9SBarry Smith       jj++;
1681416022c9SBarry Smith     }
1682416022c9SBarry Smith   }
1683d65a2f8fSBarry Smith 
1684d65a2f8fSBarry Smith   /* create our matrix */
1685416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1686416022c9SBarry Smith     ourlens[i] -= offlens[i];
1687416022c9SBarry Smith   }
1688d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1689d65a2f8fSBarry Smith   A = *newmat;
1690d65a2f8fSBarry Smith   MatSetOption(A,COLUMNS_SORTED);
1691d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1692d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1693d65a2f8fSBarry Smith   }
1694416022c9SBarry Smith 
169517699dbbSLois Curfman McInnes   if (!rank) {
16960452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1697416022c9SBarry Smith 
1698416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1699416022c9SBarry Smith     nz = procsnz[0];
1700416022c9SBarry Smith     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1701d65a2f8fSBarry Smith 
1702d65a2f8fSBarry Smith     /* insert into matrix */
1703d65a2f8fSBarry Smith     jj      = rstart;
1704d65a2f8fSBarry Smith     smycols = mycols;
1705d65a2f8fSBarry Smith     svals   = vals;
1706d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1707d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1708d65a2f8fSBarry Smith       smycols += ourlens[i];
1709d65a2f8fSBarry Smith       svals   += ourlens[i];
1710d65a2f8fSBarry Smith       jj++;
1711416022c9SBarry Smith     }
1712416022c9SBarry Smith 
1713d65a2f8fSBarry Smith     /* read in other processors and ship out */
171417699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1715416022c9SBarry Smith       nz = procsnz[i];
1716416022c9SBarry Smith       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1717d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1718416022c9SBarry Smith     }
17190452661fSBarry Smith     PetscFree(procsnz);
1720416022c9SBarry Smith   }
1721d65a2f8fSBarry Smith   else {
1722d65a2f8fSBarry Smith     /* receive numeric values */
17230452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1724416022c9SBarry Smith 
1725d65a2f8fSBarry Smith     /* receive message of values*/
1726d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1727d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1728c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1729d65a2f8fSBarry Smith 
1730d65a2f8fSBarry Smith     /* insert into matrix */
1731d65a2f8fSBarry Smith     jj      = rstart;
1732d65a2f8fSBarry Smith     smycols = mycols;
1733d65a2f8fSBarry Smith     svals   = vals;
1734d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1735d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1736d65a2f8fSBarry Smith       smycols += ourlens[i];
1737d65a2f8fSBarry Smith       svals   += ourlens[i];
1738d65a2f8fSBarry Smith       jj++;
1739d65a2f8fSBarry Smith     }
1740d65a2f8fSBarry Smith   }
17410452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1742d65a2f8fSBarry Smith 
1743d65a2f8fSBarry Smith   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1744d65a2f8fSBarry Smith   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1745416022c9SBarry Smith   return 0;
1746416022c9SBarry Smith }
1747