xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7fab95ffa0929fef22d7fc43e2547d4da133564f)
1cb512458SBarry Smith #ifndef lint
2*7fab95ffSSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.115 1996/01/24 05:46:00 bsmith Exp balay $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int        ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
408a729477SBarry Smith {
4144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
434b0e389bSBarry Smith   Scalar     value;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
464b0e389bSBarry Smith   int        shift = C->indexshift,roworiented = aij->roworiented;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
534b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
544b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
564b0e389bSBarry Smith       row = im[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
584b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
594b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
604b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
614b0e389bSBarry Smith           col = in[j] - cstart;
624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
634b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
641eb62cbbSBarry Smith         }
651eb62cbbSBarry Smith         else {
66c456f294SBarry Smith           if (mat->assembled) {
67b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
684b0e389bSBarry Smith             col = aij->colmap[in[j]] + shift;
69ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
702493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
714b0e389bSBarry Smith               col =  in[j];
72d6dfbf8fSBarry Smith             }
739e25ed09SBarry Smith           }
744b0e389bSBarry Smith           else col = in[j];
754b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
764b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
771eb62cbbSBarry Smith         }
781eb62cbbSBarry Smith       }
791eb62cbbSBarry Smith     }
801eb62cbbSBarry Smith     else {
814b0e389bSBarry Smith       if (roworiented) {
824b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
834b0e389bSBarry Smith       }
844b0e389bSBarry Smith       else {
854b0e389bSBarry Smith         row = im[i];
864b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
874b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
884b0e389bSBarry Smith         }
894b0e389bSBarry Smith       }
901eb62cbbSBarry Smith     }
918a729477SBarry Smith   }
928a729477SBarry Smith   return 0;
938a729477SBarry Smith }
948a729477SBarry Smith 
95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96b49de8d1SLois Curfman McInnes {
97b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
101b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
102b49de8d1SLois Curfman McInnes 
103b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
104b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
107b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
108b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
109b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
112b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
113b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114b49de8d1SLois Curfman McInnes         }
115b49de8d1SLois Curfman McInnes         else {
116b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
117b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
118b49de8d1SLois Curfman McInnes         }
119b49de8d1SLois Curfman McInnes       }
120b49de8d1SLois Curfman McInnes     }
121b49de8d1SLois Curfman McInnes     else {
122b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
123b49de8d1SLois Curfman McInnes     }
124b49de8d1SLois Curfman McInnes   }
125b49de8d1SLois Curfman McInnes   return 0;
126b49de8d1SLois Curfman McInnes }
127b49de8d1SLois Curfman McInnes 
128fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1298a729477SBarry Smith {
13044a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
131d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
13217699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
13317699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1341eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1356abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1361eb62cbbSBarry Smith   InsertMode  addv;
1371eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1381eb62cbbSBarry Smith 
1391eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
140d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
141dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
142bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1431eb62cbbSBarry Smith   }
1441eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1451eb62cbbSBarry Smith 
1461eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1470452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
148cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1490452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1501eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1511eb62cbbSBarry Smith     idx = aij->stash.idx[i];
15217699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1531eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1541eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1558a729477SBarry Smith       }
1568a729477SBarry Smith     }
1578a729477SBarry Smith   }
15817699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1591eb62cbbSBarry Smith 
1601eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1610452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
16217699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
16317699dbbSLois Curfman McInnes   nreceives = work[rank];
16417699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
16517699dbbSLois Curfman McInnes   nmax = work[rank];
1660452661fSBarry Smith   PetscFree(work);
1671eb62cbbSBarry Smith 
1681eb62cbbSBarry Smith   /* post receives:
1691eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1701eb62cbbSBarry Smith      (global index,value) we store the global index as a double
171d6dfbf8fSBarry Smith      to simplify the message passing.
1721eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1731eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1741eb62cbbSBarry Smith      this is a lot of wasted space.
1751eb62cbbSBarry Smith 
1761eb62cbbSBarry Smith 
1771eb62cbbSBarry Smith        This could be done better.
1781eb62cbbSBarry Smith   */
1790452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
18078b31e54SBarry Smith   CHKPTRQ(rvalues);
1810452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
18278b31e54SBarry Smith   CHKPTRQ(recv_waits);
1831eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
184416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1851eb62cbbSBarry Smith               comm,recv_waits+i);
1861eb62cbbSBarry Smith   }
1871eb62cbbSBarry Smith 
1881eb62cbbSBarry Smith   /* do sends:
1891eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1901eb62cbbSBarry Smith          the ith processor
1911eb62cbbSBarry Smith   */
1920452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1930452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
19478b31e54SBarry Smith   CHKPTRQ(send_waits);
1950452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1961eb62cbbSBarry Smith   starts[0] = 0;
19717699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1981eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1991eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2001eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2011eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2021eb62cbbSBarry Smith   }
2030452661fSBarry Smith   PetscFree(owner);
2041eb62cbbSBarry Smith   starts[0] = 0;
20517699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2061eb62cbbSBarry Smith   count = 0;
20717699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2081eb62cbbSBarry Smith     if (procs[i]) {
209416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2101eb62cbbSBarry Smith                 comm,send_waits+count++);
2111eb62cbbSBarry Smith     }
2121eb62cbbSBarry Smith   }
2130452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2141eb62cbbSBarry Smith 
2151eb62cbbSBarry Smith   /* Free cache space */
21678b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2171eb62cbbSBarry Smith 
2181eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2191eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2201eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2211eb62cbbSBarry Smith   aij->rmax       = nmax;
2221eb62cbbSBarry Smith 
2231eb62cbbSBarry Smith   return 0;
2241eb62cbbSBarry Smith }
22544a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2261eb62cbbSBarry Smith 
227fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2281eb62cbbSBarry Smith {
22944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
230dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2311eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
232416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
233416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2341eb62cbbSBarry Smith   Scalar      *values,val;
2351eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2361eb62cbbSBarry Smith 
2371eb62cbbSBarry Smith   /*  wait on receives */
2381eb62cbbSBarry Smith   while (count) {
239d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2401eb62cbbSBarry Smith     /* unpack receives into our local space */
241d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
242c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2431eb62cbbSBarry Smith     n = n/3;
2441eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
2451eb62cbbSBarry Smith       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
2461eb62cbbSBarry Smith       col = (int) PETSCREAL(values[3*i+1]);
2471eb62cbbSBarry Smith       val = values[3*i+2];
2481eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2491eb62cbbSBarry Smith         col -= aij->cstart;
2501eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2511eb62cbbSBarry Smith       }
2521eb62cbbSBarry Smith       else {
253c456f294SBarry Smith         if (mat->assembled) {
254b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
255dbb450caSBarry Smith           col = aij->colmap[col] + shift;
256ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2572493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2582493cbb0SBarry Smith             col = (int) PETSCREAL(values[3*i+1]);
259d6dfbf8fSBarry Smith           }
2609e25ed09SBarry Smith         }
2611eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2621eb62cbbSBarry Smith       }
2631eb62cbbSBarry Smith     }
2641eb62cbbSBarry Smith     count--;
2651eb62cbbSBarry Smith   }
2660452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2671eb62cbbSBarry Smith 
2681eb62cbbSBarry Smith   /* wait on sends */
2691eb62cbbSBarry Smith   if (aij->nsends) {
2700452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
27178b31e54SBarry Smith     CHKPTRQ(send_status);
2721eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2730452661fSBarry Smith     PetscFree(send_status);
2741eb62cbbSBarry Smith   }
2750452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2761eb62cbbSBarry Smith 
27764119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
27878b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
27978b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2801eb62cbbSBarry Smith 
2812493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2822493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
283c456f294SBarry Smith   MPI_Allreduce(&mat->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
284c456f294SBarry Smith   if (mat->assembled && !other_disassembled) {
2852493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2862493cbb0SBarry Smith   }
2872493cbb0SBarry Smith 
288c456f294SBarry Smith   if (!mat->assembled && mode == FINAL_ASSEMBLY) {
28978b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
2905e42470aSBarry Smith   }
29178b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
29278b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2935e42470aSBarry Smith 
2948a729477SBarry Smith   return 0;
2958a729477SBarry Smith }
2968a729477SBarry Smith 
2972ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
2981eb62cbbSBarry Smith {
29944a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
300dbd7a890SLois Curfman McInnes   int        ierr;
30178b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
30278b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3031eb62cbbSBarry Smith   return 0;
3041eb62cbbSBarry Smith }
3051eb62cbbSBarry Smith 
3061eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3071eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3081eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3091eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3101eb62cbbSBarry Smith    routine.
3111eb62cbbSBarry Smith */
31244a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3131eb62cbbSBarry Smith {
31444a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
31517699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3166abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
31717699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3185392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
319d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
320d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3211eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3221eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3231eb62cbbSBarry Smith   IS             istmp;
3241eb62cbbSBarry Smith 
32578b31e54SBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
32678b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3271eb62cbbSBarry Smith 
3281eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3290452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
330cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3310452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3321eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3331eb62cbbSBarry Smith     idx = rows[i];
3341eb62cbbSBarry Smith     found = 0;
33517699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3361eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3371eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3381eb62cbbSBarry Smith       }
3391eb62cbbSBarry Smith     }
340bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3411eb62cbbSBarry Smith   }
34217699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3431eb62cbbSBarry Smith 
3441eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3450452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
34617699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
34717699dbbSLois Curfman McInnes   nrecvs = work[rank];
34817699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
34917699dbbSLois Curfman McInnes   nmax = work[rank];
3500452661fSBarry Smith   PetscFree(work);
3511eb62cbbSBarry Smith 
3521eb62cbbSBarry Smith   /* post receives:   */
3530452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
35478b31e54SBarry Smith   CHKPTRQ(rvalues);
3550452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
35678b31e54SBarry Smith   CHKPTRQ(recv_waits);
3571eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
358416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3591eb62cbbSBarry Smith   }
3601eb62cbbSBarry Smith 
3611eb62cbbSBarry Smith   /* do sends:
3621eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3631eb62cbbSBarry Smith          the ith processor
3641eb62cbbSBarry Smith   */
3650452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3660452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
36778b31e54SBarry Smith   CHKPTRQ(send_waits);
3680452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3691eb62cbbSBarry Smith   starts[0] = 0;
37017699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3711eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3721eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3731eb62cbbSBarry Smith   }
3741eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3751eb62cbbSBarry Smith 
3761eb62cbbSBarry Smith   starts[0] = 0;
37717699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3781eb62cbbSBarry Smith   count = 0;
37917699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3801eb62cbbSBarry Smith     if (procs[i]) {
381416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3821eb62cbbSBarry Smith     }
3831eb62cbbSBarry Smith   }
3840452661fSBarry Smith   PetscFree(starts);
3851eb62cbbSBarry Smith 
38617699dbbSLois Curfman McInnes   base = owners[rank];
3871eb62cbbSBarry Smith 
3881eb62cbbSBarry Smith   /*  wait on receives */
3890452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3901eb62cbbSBarry Smith   source = lens + nrecvs;
3911eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3921eb62cbbSBarry Smith   while (count) {
393d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3941eb62cbbSBarry Smith     /* unpack receives into our local space */
3951eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
396d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
397d6dfbf8fSBarry Smith     lens[imdex]  = n;
3981eb62cbbSBarry Smith     slen += n;
3991eb62cbbSBarry Smith     count--;
4001eb62cbbSBarry Smith   }
4010452661fSBarry Smith   PetscFree(recv_waits);
4021eb62cbbSBarry Smith 
4031eb62cbbSBarry Smith   /* move the data into the send scatter */
4040452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4051eb62cbbSBarry Smith   count = 0;
4061eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4071eb62cbbSBarry Smith     values = rvalues + i*nmax;
4081eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4091eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4101eb62cbbSBarry Smith     }
4111eb62cbbSBarry Smith   }
4120452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4130452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4141eb62cbbSBarry Smith 
4151eb62cbbSBarry Smith   /* actually zap the local rows */
416416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
417464493b3SBarry Smith   PLogObjectParent(A,istmp);
4180452661fSBarry Smith   PetscFree(lrows);
41978b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
42078b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
42178b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4221eb62cbbSBarry Smith 
4231eb62cbbSBarry Smith   /* wait on sends */
4241eb62cbbSBarry Smith   if (nsends) {
4250452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
42678b31e54SBarry Smith     CHKPTRQ(send_status);
4271eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4280452661fSBarry Smith     PetscFree(send_status);
4291eb62cbbSBarry Smith   }
4300452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4311eb62cbbSBarry Smith 
4321eb62cbbSBarry Smith   return 0;
4331eb62cbbSBarry Smith }
4341eb62cbbSBarry Smith 
435416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4361eb62cbbSBarry Smith {
437416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
4381eb62cbbSBarry Smith   int        ierr;
439416022c9SBarry Smith 
44064119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
441416022c9SBarry Smith   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
44264119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
443416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4441eb62cbbSBarry Smith   return 0;
4451eb62cbbSBarry Smith }
4461eb62cbbSBarry Smith 
447416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
448da3a660dSBarry Smith {
449416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
450da3a660dSBarry Smith   int        ierr;
45164119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
452416022c9SBarry Smith   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
45364119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
454416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
455da3a660dSBarry Smith   return 0;
456da3a660dSBarry Smith }
457da3a660dSBarry Smith 
458416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
459da3a660dSBarry Smith {
460416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
461da3a660dSBarry Smith   int        ierr;
462da3a660dSBarry Smith 
463da3a660dSBarry Smith   /* do nondiagonal part */
464416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
465da3a660dSBarry Smith   /* send it on its way */
466416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
46764119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
468da3a660dSBarry Smith   /* do local part */
469416022c9SBarry Smith   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
470da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
471da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
472da3a660dSBarry Smith   /* but is not perhaps always true. */
473416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
47464119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
475da3a660dSBarry Smith   return 0;
476da3a660dSBarry Smith }
477da3a660dSBarry Smith 
478416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
479da3a660dSBarry Smith {
480416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
481da3a660dSBarry Smith   int        ierr;
482da3a660dSBarry Smith 
483da3a660dSBarry Smith   /* do nondiagonal part */
484416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
485da3a660dSBarry Smith   /* send it on its way */
486416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
48764119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
488da3a660dSBarry Smith   /* do local part */
489416022c9SBarry Smith   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
490da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
491da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
492da3a660dSBarry Smith   /* but is not perhaps always true. */
493416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
49464119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
495da3a660dSBarry Smith   return 0;
496da3a660dSBarry Smith }
497da3a660dSBarry Smith 
4981eb62cbbSBarry Smith /*
4991eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5001eb62cbbSBarry Smith    diagonal block
5011eb62cbbSBarry Smith */
502416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5031eb62cbbSBarry Smith {
504416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
505416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5061eb62cbbSBarry Smith }
5071eb62cbbSBarry Smith 
508052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
509052efed2SBarry Smith {
510052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
511052efed2SBarry Smith   int        ierr;
512052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
513052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
514052efed2SBarry Smith   return 0;
515052efed2SBarry Smith }
516052efed2SBarry Smith 
51744a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5181eb62cbbSBarry Smith {
5191eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
52044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5211eb62cbbSBarry Smith   int        ierr;
522a5a9c739SBarry Smith #if defined(PETSC_LOG)
5236e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
524a5a9c739SBarry Smith #endif
5250452661fSBarry Smith   PetscFree(aij->rowners);
52678b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
52778b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5280452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5290452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5301eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
531a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5320452661fSBarry Smith   PetscFree(aij);
533a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5340452661fSBarry Smith   PetscHeaderDestroy(mat);
5351eb62cbbSBarry Smith   return 0;
5361eb62cbbSBarry Smith }
5377857610eSBarry Smith #include "draw.h"
538b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
539ee50ffe9SBarry Smith 
540416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5411eb62cbbSBarry Smith {
542416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
543416022c9SBarry Smith   int         ierr;
544416022c9SBarry Smith 
54517699dbbSLois Curfman McInnes   if (aij->size == 1) {
546416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
547416022c9SBarry Smith   }
548a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
549416022c9SBarry Smith   return 0;
550416022c9SBarry Smith }
551416022c9SBarry Smith 
552d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
553416022c9SBarry Smith {
55444a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
555dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
556a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
557d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
558d636dbe3SBarry Smith   FILE        *fd;
559416022c9SBarry Smith 
560416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
561416022c9SBarry Smith     ierr = ViewerFileGetFormat_Private(viewer,&format);
56208480c60SBarry Smith     if (format == FILE_FORMAT_INFO_DETAILED) {
563a56f8943SBarry Smith       int nz,nzalloc,mem;
564a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
565a56f8943SBarry Smith       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
566a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
567a56f8943SBarry Smith       MPIU_Seq_begin(mat->comm,1);
568a56f8943SBarry Smith       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
569a56f8943SBarry Smith                 nzalloc,mem);
57008480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
57108480c60SBarry Smith       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
57208480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
57308480c60SBarry Smith       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
574a56f8943SBarry Smith       fflush(fd);
575a56f8943SBarry Smith       MPIU_Seq_end(mat->comm,1);
576a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
577416022c9SBarry Smith       return 0;
578416022c9SBarry Smith     }
57908480c60SBarry Smith     else if (format == FILE_FORMAT_INFO) {
58008480c60SBarry Smith       return 0;
58108480c60SBarry Smith     }
582416022c9SBarry Smith   }
583416022c9SBarry Smith 
584416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER) {
585d636dbe3SBarry Smith     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
5867f813858SBarry Smith     MPIU_Seq_begin(mat->comm,1);
587d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
58817699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5891eb62cbbSBarry Smith            aij->cend);
59078b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
59178b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
592d13ab20cSBarry Smith     fflush(fd);
5937f813858SBarry Smith     MPIU_Seq_end(mat->comm,1);
594d13ab20cSBarry Smith   }
595416022c9SBarry Smith   else {
596a56f8943SBarry Smith     int size = aij->size;
597a56f8943SBarry Smith     rank = aij->rank;
59817699dbbSLois Curfman McInnes     if (size == 1) {
59978b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
60095373324SBarry Smith     }
60195373324SBarry Smith     else {
60295373324SBarry Smith       /* assemble the entire matrix onto first processor. */
60395373324SBarry Smith       Mat         A;
604ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6052eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
60695373324SBarry Smith       Scalar      *a;
6072ee70a88SLois Curfman McInnes 
60817699dbbSLois Curfman McInnes       if (!rank) {
609b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
610c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
61195373324SBarry Smith       }
61295373324SBarry Smith       else {
613b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
614c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
61595373324SBarry Smith       }
616464493b3SBarry Smith       PLogObjectParent(mat,A);
617416022c9SBarry Smith 
61895373324SBarry Smith       /* copy over the A part */
619ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6202ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
62195373324SBarry Smith       row = aij->rstart;
622dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
62395373324SBarry Smith       for ( i=0; i<m; i++ ) {
624416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
62595373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
62695373324SBarry Smith       }
6272ee70a88SLois Curfman McInnes       aj = Aloc->j;
628dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
62995373324SBarry Smith 
63095373324SBarry Smith       /* copy over the B part */
631ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6322ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
63395373324SBarry Smith       row = aij->rstart;
6340452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
635dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
63695373324SBarry Smith       for ( i=0; i<m; i++ ) {
637416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
63895373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
63995373324SBarry Smith       }
6400452661fSBarry Smith       PetscFree(ct);
64178b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
64278b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
64317699dbbSLois Curfman McInnes       if (!rank) {
64478b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
64595373324SBarry Smith       }
64678b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
64795373324SBarry Smith     }
64895373324SBarry Smith   }
6491eb62cbbSBarry Smith   return 0;
6501eb62cbbSBarry Smith }
6511eb62cbbSBarry Smith 
652416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
653416022c9SBarry Smith {
654416022c9SBarry Smith   Mat         mat = (Mat) obj;
655416022c9SBarry Smith   int         ierr;
656416022c9SBarry Smith   PetscObject vobj = (PetscObject) viewer;
657416022c9SBarry Smith 
658416022c9SBarry Smith   if (!viewer) {
659416022c9SBarry Smith     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
660416022c9SBarry Smith   }
661416022c9SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
662416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
663d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
664416022c9SBarry Smith   }
665416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
666d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
667d7e8b826SBarry Smith   }
668d7e8b826SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
669d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
670416022c9SBarry Smith   }
671416022c9SBarry Smith   else if (vobj->cookie == DRAW_COOKIE) {
672d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
673416022c9SBarry Smith   }
674416022c9SBarry Smith   else if (vobj->type == BINARY_FILE_VIEWER) {
675416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
676416022c9SBarry Smith   }
677416022c9SBarry Smith   return 0;
678416022c9SBarry Smith }
679416022c9SBarry Smith 
680ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat);
6811eb62cbbSBarry Smith /*
6821eb62cbbSBarry Smith     This has to provide several versions.
6831eb62cbbSBarry Smith 
6841eb62cbbSBarry Smith      1) per sequential
6851eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6861eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
687d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
6881eb62cbbSBarry Smith */
689fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
690dbb450caSBarry Smith                            double fshift,int its,Vec xx)
6918a729477SBarry Smith {
69244a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
693d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
694ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
6956abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
6966abc6512SBarry Smith   int        ierr,*idx, *diag;
697416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
698da3a660dSBarry Smith   Vec        tt;
6998a729477SBarry Smith 
700d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
701dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
702dbb450caSBarry Smith   ls = ls + shift;
703ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
704d6dfbf8fSBarry Smith   diag = A->diag;
705acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
70648d91487SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
707acb40c82SBarry Smith   }
708da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
709da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
710da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
711da3a660dSBarry Smith 
712da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
713da3a660dSBarry Smith 
714da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
715da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
716da3a660dSBarry Smith     is the relaxation factor.
717da3a660dSBarry Smith     */
71878b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
719464493b3SBarry Smith     PLogObjectParent(matin,tt);
720da3a660dSBarry Smith     VecGetArray(tt,&t);
721da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
722da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
723da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
72464119d58SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
72578b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
726da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
727da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
728dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
729dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
730da3a660dSBarry Smith       sum  = b[i];
731da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
732dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
733da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
734dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
735dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
736da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
737da3a660dSBarry Smith       x[i] = omega*(sum/d);
738da3a660dSBarry Smith     }
73964119d58SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
74078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
741da3a660dSBarry Smith 
742da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
743da3a660dSBarry Smith     v = A->a;
744dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
745da3a660dSBarry Smith 
746da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
747dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
748da3a660dSBarry Smith     diag = A->diag;
749da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
75064119d58SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
75178b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
752da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
753da3a660dSBarry Smith       n    = diag[i] - A->i[i];
754dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
755dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
756da3a660dSBarry Smith       sum  = t[i];
757da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
758dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
759da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
760dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
761dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
762da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
763da3a660dSBarry Smith       t[i] = omega*(sum/d);
764da3a660dSBarry Smith     }
76564119d58SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
76678b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
767da3a660dSBarry Smith     /*  x = x + t */
768da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
769da3a660dSBarry Smith     VecDestroy(tt);
770da3a660dSBarry Smith     return 0;
771da3a660dSBarry Smith   }
772da3a660dSBarry Smith 
7731eb62cbbSBarry Smith 
774d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
775da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
776da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
777da3a660dSBarry Smith     }
778da3a660dSBarry Smith     else {
77964119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
78078b31e54SBarry Smith       CHKERRQ(ierr);
78164119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
78278b31e54SBarry Smith       CHKERRQ(ierr);
783da3a660dSBarry Smith     }
784d6dfbf8fSBarry Smith     while (its--) {
785d6dfbf8fSBarry Smith       /* go down through the rows */
78664119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
78778b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
788d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
789d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
790dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
791dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
792d6dfbf8fSBarry Smith         sum  = b[i];
793d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
794dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
795d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
796dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
797dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
798d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
799d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
800d6dfbf8fSBarry Smith       }
80164119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
80278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
803d6dfbf8fSBarry Smith       /* come up through the rows */
80464119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
80578b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
806d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
807d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
808dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
809dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
810d6dfbf8fSBarry Smith         sum  = b[i];
811d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
812dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
813d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
814dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
815dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
816d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
817d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
818d6dfbf8fSBarry Smith       }
81964119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
82078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
821d6dfbf8fSBarry Smith     }
822d6dfbf8fSBarry Smith   }
823d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
824da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
825da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
82664119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
82778b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
828da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
829da3a660dSBarry Smith         n    = diag[i] - A->i[i];
830dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
831dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
832da3a660dSBarry Smith         sum  = b[i];
833da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
834dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
835da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
836dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
837dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
838da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
839da3a660dSBarry Smith         x[i] = omega*(sum/d);
840da3a660dSBarry Smith       }
84164119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
84278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
843da3a660dSBarry Smith       its--;
844da3a660dSBarry Smith     }
845d6dfbf8fSBarry Smith     while (its--) {
84664119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
84778b31e54SBarry Smith       CHKERRQ(ierr);
84864119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
84978b31e54SBarry Smith       CHKERRQ(ierr);
85064119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
85178b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
852d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
853d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
854dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
855dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
856d6dfbf8fSBarry Smith         sum  = b[i];
857d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
858dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
859d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
860dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
861dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
862d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
863d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
864d6dfbf8fSBarry Smith       }
86564119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
86678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
867d6dfbf8fSBarry Smith     }
868d6dfbf8fSBarry Smith   }
869d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
870da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
871da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
87264119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
87378b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
874da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
875da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
876dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
877dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
878da3a660dSBarry Smith         sum  = b[i];
879da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
880dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
881da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
882dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
883dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
884da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
885da3a660dSBarry Smith         x[i] = omega*(sum/d);
886da3a660dSBarry Smith       }
88764119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
88878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
889da3a660dSBarry Smith       its--;
890da3a660dSBarry Smith     }
891d6dfbf8fSBarry Smith     while (its--) {
89264119d58SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
89378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
89464119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
89578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
89664119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
89778b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
898d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
899d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
900dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
901dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
902d6dfbf8fSBarry Smith         sum  = b[i];
903d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
904dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
905d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
906dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
907dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
908d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
909d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
910d6dfbf8fSBarry Smith       }
91164119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
91278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
913d6dfbf8fSBarry Smith     }
914d6dfbf8fSBarry Smith   }
915d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
916da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
917dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
918da3a660dSBarry Smith     }
91964119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
92078b31e54SBarry Smith     CHKERRQ(ierr);
92164119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
92278b31e54SBarry Smith     CHKERRQ(ierr);
923d6dfbf8fSBarry Smith     while (its--) {
924d6dfbf8fSBarry Smith       /* go down through the rows */
925d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
926d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
927dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
928dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
929d6dfbf8fSBarry Smith         sum  = b[i];
930d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
931dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
932d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
933dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
934dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
935d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
936d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
937d6dfbf8fSBarry Smith       }
938d6dfbf8fSBarry Smith       /* come up through the rows */
939d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
940d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
941dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
942dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
943d6dfbf8fSBarry Smith         sum  = b[i];
944d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
945dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
946d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
947dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
948dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
949d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
950d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
951d6dfbf8fSBarry Smith       }
952d6dfbf8fSBarry Smith     }
953d6dfbf8fSBarry Smith   }
954d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
955da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
956dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
957da3a660dSBarry Smith     }
95864119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
95978b31e54SBarry Smith     CHKERRQ(ierr);
96064119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
96178b31e54SBarry Smith     CHKERRQ(ierr);
962d6dfbf8fSBarry Smith     while (its--) {
963d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
964d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
965dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
966dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
967d6dfbf8fSBarry Smith         sum  = b[i];
968d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
969dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
970d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
971dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
972dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
973d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
974d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
975d6dfbf8fSBarry Smith       }
976d6dfbf8fSBarry Smith     }
977d6dfbf8fSBarry Smith   }
978d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
979da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
980dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
981da3a660dSBarry Smith     }
98264119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
98378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
98464119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
98578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
986d6dfbf8fSBarry Smith     while (its--) {
987d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
988d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
989dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
990dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
991d6dfbf8fSBarry Smith         sum  = b[i];
992d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
993dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
994d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
995dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
996dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
997d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
998d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
999d6dfbf8fSBarry Smith       }
1000d6dfbf8fSBarry Smith     }
1001d6dfbf8fSBarry Smith   }
10028a729477SBarry Smith   return 0;
10038a729477SBarry Smith }
1004a66be287SLois Curfman McInnes 
1005fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1006fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
1007a66be287SLois Curfman McInnes {
1008a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1009a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
1010a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1011a66be287SLois Curfman McInnes 
101278b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
101378b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1014a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1015a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
1016a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1017a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1018d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1019a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1020a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1021d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1022a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1023a66be287SLois Curfman McInnes   }
1024a66be287SLois Curfman McInnes   return 0;
1025a66be287SLois Curfman McInnes }
1026a66be287SLois Curfman McInnes 
1027299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1028299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1029299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1030299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1031299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1032299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1033299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1034299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1035299609e3SLois Curfman McInnes 
1036416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1037c74985f6SBarry Smith {
1038c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1039c74985f6SBarry Smith 
1040c0bbcb79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
1041c0bbcb79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
1042c0bbcb79SLois Curfman McInnes       op == COLUMNS_SORTED ||
1043c0bbcb79SLois Curfman McInnes       op == ROW_ORIENTED) {
1044c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
1045c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
1046c74985f6SBarry Smith   }
1047c0bbcb79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
1048c0bbcb79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
1049c0bbcb79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1050c0bbcb79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
1051c0bbcb79SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10524b0e389bSBarry Smith   else if (op == COLUMN_ORIENTED) {
10534b0e389bSBarry Smith     a->roworiented = 0;
10544b0e389bSBarry Smith     MatSetOption(a->A,op);
10554b0e389bSBarry Smith     MatSetOption(a->B,op);
10564b0e389bSBarry Smith   }
1057c0bbcb79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
10584d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1059c0bbcb79SLois Curfman McInnes   else
10604d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1061c74985f6SBarry Smith   return 0;
1062c74985f6SBarry Smith }
1063c74985f6SBarry Smith 
1064d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1065c74985f6SBarry Smith {
106644a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1067c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1068c74985f6SBarry Smith   return 0;
1069c74985f6SBarry Smith }
1070c74985f6SBarry Smith 
1071d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1072c74985f6SBarry Smith {
107344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1074b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1075c74985f6SBarry Smith   return 0;
1076c74985f6SBarry Smith }
1077c74985f6SBarry Smith 
1078d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1079c74985f6SBarry Smith {
108044a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1081c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1082c74985f6SBarry Smith   return 0;
1083c74985f6SBarry Smith }
1084c74985f6SBarry Smith 
108539e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
108639e00950SLois Curfman McInnes {
1087154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1088154123eaSLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1089154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1090154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
109139e00950SLois Curfman McInnes 
1092b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1093abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
109439e00950SLois Curfman McInnes 
1095154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1096154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1097154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
109878b31e54SBarry Smith   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
109978b31e54SBarry Smith   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1100154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1101154123eaSLois Curfman McInnes 
1102154123eaSLois Curfman McInnes   if (v  || idx) {
1103154123eaSLois Curfman McInnes     if (nztot) {
1104154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1105299609e3SLois Curfman McInnes       int imark;
1106c456f294SBarry Smith       if (matin->assembled) {
1107154123eaSLois Curfman McInnes         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
110848b35521SBarry Smith       }
1109154123eaSLois Curfman McInnes       if (v) {
11100452661fSBarry Smith         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
111139e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1112154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1113154123eaSLois Curfman McInnes           else break;
1114154123eaSLois Curfman McInnes         }
1115154123eaSLois Curfman McInnes         imark = i;
1116154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1117299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1118154123eaSLois Curfman McInnes       }
1119154123eaSLois Curfman McInnes       if (idx) {
11200452661fSBarry Smith         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1121154123eaSLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1122154123eaSLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1123154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1124154123eaSLois Curfman McInnes           else break;
1125154123eaSLois Curfman McInnes         }
1126154123eaSLois Curfman McInnes         imark = i;
1127154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1128299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
112939e00950SLois Curfman McInnes       }
113039e00950SLois Curfman McInnes     }
113139e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1132154123eaSLois Curfman McInnes   }
113339e00950SLois Curfman McInnes   *nz = nztot;
113478b31e54SBarry Smith   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
113578b31e54SBarry Smith   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
113639e00950SLois Curfman McInnes   return 0;
113739e00950SLois Curfman McInnes }
113839e00950SLois Curfman McInnes 
113939e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
114039e00950SLois Curfman McInnes {
11410452661fSBarry Smith   if (idx) PetscFree(*idx);
11420452661fSBarry Smith   if (v) PetscFree(*v);
114339e00950SLois Curfman McInnes   return 0;
114439e00950SLois Curfman McInnes }
114539e00950SLois Curfman McInnes 
1146cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1147855ac2c5SLois Curfman McInnes {
1148855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1149ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1150416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1151416022c9SBarry Smith   double     sum = 0.0;
115204ca555eSLois Curfman McInnes   Scalar     *v;
115304ca555eSLois Curfman McInnes 
115417699dbbSLois Curfman McInnes   if (aij->size == 1) {
115514183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
115637fa93a5SLois Curfman McInnes   } else {
115704ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
115804ca555eSLois Curfman McInnes       v = amat->a;
115904ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
116004ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
116104ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
116204ca555eSLois Curfman McInnes #else
116304ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
116404ca555eSLois Curfman McInnes #endif
116504ca555eSLois Curfman McInnes       }
116604ca555eSLois Curfman McInnes       v = bmat->a;
116704ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
116804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
116904ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
117004ca555eSLois Curfman McInnes #else
117104ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
117204ca555eSLois Curfman McInnes #endif
117304ca555eSLois Curfman McInnes       }
117404ca555eSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
117504ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
117604ca555eSLois Curfman McInnes     }
117704ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
117804ca555eSLois Curfman McInnes       double *tmp, *tmp2;
117904ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
11800452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
11810452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1182cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
118304ca555eSLois Curfman McInnes       *norm = 0.0;
118404ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
118504ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1186579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
118704ca555eSLois Curfman McInnes       }
118804ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
118904ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1190579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
119104ca555eSLois Curfman McInnes       }
119204ca555eSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
119304ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
119404ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
119504ca555eSLois Curfman McInnes       }
11960452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
119704ca555eSLois Curfman McInnes     }
119804ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1199515d9167SLois Curfman McInnes       double ntemp = 0.0;
120004ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1201dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
120204ca555eSLois Curfman McInnes         sum = 0.0;
120304ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1204cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
120504ca555eSLois Curfman McInnes         }
1206dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
120704ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1208cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
120904ca555eSLois Curfman McInnes         }
1210515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
121104ca555eSLois Curfman McInnes       }
1212515d9167SLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
121304ca555eSLois Curfman McInnes     }
121404ca555eSLois Curfman McInnes     else {
1215515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
121604ca555eSLois Curfman McInnes     }
121737fa93a5SLois Curfman McInnes   }
1218855ac2c5SLois Curfman McInnes   return 0;
1219855ac2c5SLois Curfman McInnes }
1220855ac2c5SLois Curfman McInnes 
12210de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1222b7c46309SBarry Smith {
1223b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1224dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1225416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1226416022c9SBarry Smith   Mat        B;
1227b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1228b7c46309SBarry Smith   Scalar     *array;
1229b7c46309SBarry Smith 
1230416022c9SBarry Smith   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1231b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1232b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1233b7c46309SBarry Smith 
1234b7c46309SBarry Smith   /* copy over the A part */
1235ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1236b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1237b7c46309SBarry Smith   row = a->rstart;
1238dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1239b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1240416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1241b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1242b7c46309SBarry Smith   }
1243b7c46309SBarry Smith   aj = Aloc->j;
1244dbb450caSBarry Smith   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1245b7c46309SBarry Smith 
1246b7c46309SBarry Smith   /* copy over the B part */
1247ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1248b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1249b7c46309SBarry Smith   row = a->rstart;
12500452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1251dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1252b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1253416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1254b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1255b7c46309SBarry Smith   }
12560452661fSBarry Smith   PetscFree(ct);
1257b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1258b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
12590de55854SLois Curfman McInnes   if (matout) {
12600de55854SLois Curfman McInnes     *matout = B;
12610de55854SLois Curfman McInnes   } else {
12620de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12630452661fSBarry Smith     PetscFree(a->rowners);
12640de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
12650de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
12660452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
12670452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
12680de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1269a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
12700452661fSBarry Smith     PetscFree(a);
1271416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
12720452661fSBarry Smith     PetscHeaderDestroy(B);
12730de55854SLois Curfman McInnes   }
1274b7c46309SBarry Smith   return 0;
1275b7c46309SBarry Smith }
1276b7c46309SBarry Smith 
1277682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1278682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1279682d7d0cSBarry Smith {
1280682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1281682d7d0cSBarry Smith 
1282682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1283682d7d0cSBarry Smith   else return 0;
1284682d7d0cSBarry Smith }
1285682d7d0cSBarry Smith 
1286fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
12873d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
12882f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
12898a729477SBarry Smith /* -------------------------------------------------------------------*/
12902ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
129139e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
129244a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
129344a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1294299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1295299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1296299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
129744a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1298b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1299a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1300855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1301ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13021eb62cbbSBarry Smith        0,
1303299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1304299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1305299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1306d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1307299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
13083d1612f7SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1309b49de8d1SLois Curfman McInnes        0,0,0,
1310*7fab95ffSSatish Balay        0,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1311052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
1312052efed2SBarry Smith        MatScale_MPIAIJ};
13138a729477SBarry Smith 
13141987afe7SBarry Smith /*@C
1315ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1316ff756334SLois Curfman McInnes    (the default parallel PETSc format).
13178a729477SBarry Smith 
13188a729477SBarry Smith    Input Parameters:
13191eb62cbbSBarry Smith .  comm - MPI communicator
13207d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
13217d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
13227d3e4905SLois Curfman McInnes            if N is given)
13237d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
13247d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
13257d3e4905SLois Curfman McInnes            if n is given)
1326ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1327ff756334SLois Curfman McInnes            (same for all local rows)
1328ab693e5aSLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1329ab693e5aSLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1330ab693e5aSLois Curfman McInnes            for the diagonal entry even if it is zero.
1331ab693e5aSLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1332ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
1333ab693e5aSLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1334ab693e5aSLois Curfman McInnes            submatrix or null (possibly different for each row).
13358a729477SBarry Smith 
1336ff756334SLois Curfman McInnes    Output Parameter:
13378a729477SBarry Smith .  newmat - the matrix
13388a729477SBarry Smith 
1339ff756334SLois Curfman McInnes    Notes:
1340ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1341ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
13420002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
13430002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1344ff756334SLois Curfman McInnes 
1345ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1346ff756334SLois Curfman McInnes    (possibly both).
1347ff756334SLois Curfman McInnes 
1348e0245417SLois Curfman McInnes    Storage Information:
1349e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1350e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1351e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1352e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1353e0245417SLois Curfman McInnes 
1354e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
13555ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
13565ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
13575ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
13585ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1359ff756334SLois Curfman McInnes 
1360c451ab25SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
1361c451ab25SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
1362c451ab25SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
1363c451ab25SLois Curfman McInnes 
1364c451ab25SLois Curfman McInnes    Options Database Keys:
1365c451ab25SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
1366c451ab25SLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit (max limit=5)
1367c451ab25SLois Curfman McInnes 
1368dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1369ff756334SLois Curfman McInnes 
1370fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
13718a729477SBarry Smith @*/
13721eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
13731eb62cbbSBarry Smith                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
13748a729477SBarry Smith {
13758a729477SBarry Smith   Mat          mat;
1376416022c9SBarry Smith   Mat_MPIAIJ   *a;
13776abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1378416022c9SBarry Smith 
13798a729477SBarry Smith   *newmat         = 0;
13800452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1381a5a9c739SBarry Smith   PLogObjectCreate(mat);
13820452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1383416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
138444a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
138544a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
13868a729477SBarry Smith   mat->factor     = 0;
1387c456f294SBarry Smith   mat->assembled  = PETSC_FALSE;
1388d6dfbf8fSBarry Smith 
138964119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
139017699dbbSLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
139117699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
13921eb62cbbSBarry Smith 
1393b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
13941987afe7SBarry Smith     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
13951987afe7SBarry Smith 
1396dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
13971eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1398d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1399dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1400dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
14011eb62cbbSBarry Smith   }
140217699dbbSLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
140317699dbbSLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1404416022c9SBarry Smith   a->m = m;
1405416022c9SBarry Smith   a->n = n;
1406416022c9SBarry Smith   a->N = N;
1407416022c9SBarry Smith   a->M = M;
14081eb62cbbSBarry Smith 
14091eb62cbbSBarry Smith   /* build local table of row and column ownerships */
14100452661fSBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1411579c6b6fSBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
141217699dbbSLois Curfman McInnes   a->cowners = a->rowners + a->size + 1;
1413416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1414416022c9SBarry Smith   a->rowners[0] = 0;
141517699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1416416022c9SBarry Smith     a->rowners[i] += a->rowners[i-1];
14178a729477SBarry Smith   }
141817699dbbSLois Curfman McInnes   a->rstart = a->rowners[a->rank];
141917699dbbSLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
1420416022c9SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1421416022c9SBarry Smith   a->cowners[0] = 0;
142217699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1423416022c9SBarry Smith     a->cowners[i] += a->cowners[i-1];
14248a729477SBarry Smith   }
142517699dbbSLois Curfman McInnes   a->cstart = a->cowners[a->rank];
142617699dbbSLois Curfman McInnes   a->cend   = a->cowners[a->rank+1];
14278a729477SBarry Smith 
14285ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1429416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1430416022c9SBarry Smith   PLogObjectParent(mat,a->A);
14317b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1432416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1433416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14348a729477SBarry Smith 
14351eb62cbbSBarry Smith   /* build cache for off array entries formed */
1436416022c9SBarry Smith   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1437416022c9SBarry Smith   a->colmap      = 0;
1438416022c9SBarry Smith   a->garray      = 0;
14394b0e389bSBarry Smith   a->roworiented = 1;
14408a729477SBarry Smith 
14411eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
1442416022c9SBarry Smith   a->lvec      = 0;
1443416022c9SBarry Smith   a->Mvctx     = 0;
14448a729477SBarry Smith 
1445d6dfbf8fSBarry Smith   *newmat = mat;
1446d6dfbf8fSBarry Smith   return 0;
1447d6dfbf8fSBarry Smith }
1448c74985f6SBarry Smith 
14493d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1450d6dfbf8fSBarry Smith {
1451d6dfbf8fSBarry Smith   Mat        mat;
1452416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
145325cdf11fSBarry Smith   int        ierr, len,flg;
1454d6dfbf8fSBarry Smith 
1455416022c9SBarry Smith   *newmat       = 0;
14560452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1457a5a9c739SBarry Smith   PLogObjectCreate(mat);
14580452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1459416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
146044a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
146144a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1462d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1463c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1464d6dfbf8fSBarry Smith 
1465416022c9SBarry Smith   a->m          = oldmat->m;
1466416022c9SBarry Smith   a->n          = oldmat->n;
1467416022c9SBarry Smith   a->M          = oldmat->M;
1468416022c9SBarry Smith   a->N          = oldmat->N;
1469d6dfbf8fSBarry Smith 
1470416022c9SBarry Smith   a->rstart     = oldmat->rstart;
1471416022c9SBarry Smith   a->rend       = oldmat->rend;
1472416022c9SBarry Smith   a->cstart     = oldmat->cstart;
1473416022c9SBarry Smith   a->cend       = oldmat->cend;
147417699dbbSLois Curfman McInnes   a->size       = oldmat->size;
147517699dbbSLois Curfman McInnes   a->rank       = oldmat->rank;
147664119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
1477d6dfbf8fSBarry Smith 
14780452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
147917699dbbSLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
148017699dbbSLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1481416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14822ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
14830452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1484416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1485416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1486416022c9SBarry Smith   } else a->colmap = 0;
1487ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
14880452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1489464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1490416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1491416022c9SBarry Smith   } else a->garray = 0;
1492d6dfbf8fSBarry Smith 
1493416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1494416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1495a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1496416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1497416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1498416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1499416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1500416022c9SBarry Smith   PLogObjectParent(mat,a->B);
15015dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
150225cdf11fSBarry Smith   if (flg) {
1503682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1504682d7d0cSBarry Smith   }
15058a729477SBarry Smith   *newmat = mat;
15068a729477SBarry Smith   return 0;
15078a729477SBarry Smith }
1508416022c9SBarry Smith 
1509416022c9SBarry Smith #include "sysio.h"
1510416022c9SBarry Smith 
1511c456f294SBarry Smith int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat)
1512416022c9SBarry Smith {
1513d65a2f8fSBarry Smith   Mat          A;
1514416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1515d65a2f8fSBarry Smith   Scalar       *vals,*svals;
1516416022c9SBarry Smith   PetscObject  vobj = (PetscObject) bview;
1517416022c9SBarry Smith   MPI_Comm     comm = vobj->comm;
1518416022c9SBarry Smith   MPI_Status   status;
151917699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1520d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1521d65a2f8fSBarry Smith   int          tag = ((PetscObject)bview)->tag;
1522416022c9SBarry Smith 
152317699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
152417699dbbSLois Curfman McInnes   if (!rank) {
1525416022c9SBarry Smith     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1526416022c9SBarry Smith     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1527c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1528416022c9SBarry Smith   }
1529416022c9SBarry Smith 
1530416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1531416022c9SBarry Smith   M = header[1]; N = header[2];
1532416022c9SBarry Smith   /* determine ownership of all rows */
153317699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
15340452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1535416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1536416022c9SBarry Smith   rowners[0] = 0;
153717699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1538416022c9SBarry Smith     rowners[i] += rowners[i-1];
1539416022c9SBarry Smith   }
154017699dbbSLois Curfman McInnes   rstart = rowners[rank];
154117699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1542416022c9SBarry Smith 
1543416022c9SBarry Smith   /* distribute row lengths to all processors */
15440452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1545416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
154617699dbbSLois Curfman McInnes   if (!rank) {
15470452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1548416022c9SBarry Smith     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
15490452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
155017699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1551d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
15520452661fSBarry Smith     PetscFree(sndcounts);
1553416022c9SBarry Smith   }
1554416022c9SBarry Smith   else {
1555416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1556416022c9SBarry Smith   }
1557416022c9SBarry Smith 
155817699dbbSLois Curfman McInnes   if (!rank) {
1559416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
15600452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1561cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
156217699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1563416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1564416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1565416022c9SBarry Smith       }
1566416022c9SBarry Smith     }
15670452661fSBarry Smith     PetscFree(rowlengths);
1568416022c9SBarry Smith 
1569416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1570416022c9SBarry Smith     maxnz = 0;
157117699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
15720452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1573416022c9SBarry Smith     }
15740452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1575416022c9SBarry Smith 
1576416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1577416022c9SBarry Smith     nz = procsnz[0];
15780452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1579d65a2f8fSBarry Smith     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1580d65a2f8fSBarry Smith 
1581d65a2f8fSBarry Smith     /* read in every one elses and ship off */
158217699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1583d65a2f8fSBarry Smith       nz = procsnz[i];
1584416022c9SBarry Smith       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1585d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1586d65a2f8fSBarry Smith     }
15870452661fSBarry Smith     PetscFree(cols);
1588416022c9SBarry Smith   }
1589416022c9SBarry Smith   else {
1590416022c9SBarry Smith     /* determine buffer space needed for message */
1591416022c9SBarry Smith     nz = 0;
1592416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1593416022c9SBarry Smith       nz += ourlens[i];
1594416022c9SBarry Smith     }
15950452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1596416022c9SBarry Smith 
1597416022c9SBarry Smith     /* receive message of column indices*/
1598d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1599416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1600c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1601416022c9SBarry Smith   }
1602416022c9SBarry Smith 
1603416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1604cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1605416022c9SBarry Smith   jj = 0;
1606416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1607416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1608d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1609416022c9SBarry Smith       jj++;
1610416022c9SBarry Smith     }
1611416022c9SBarry Smith   }
1612d65a2f8fSBarry Smith 
1613d65a2f8fSBarry Smith   /* create our matrix */
1614416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1615416022c9SBarry Smith     ourlens[i] -= offlens[i];
1616416022c9SBarry Smith   }
1617d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1618d65a2f8fSBarry Smith   A = *newmat;
1619d65a2f8fSBarry Smith   MatSetOption(A,COLUMNS_SORTED);
1620d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1621d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1622d65a2f8fSBarry Smith   }
1623416022c9SBarry Smith 
162417699dbbSLois Curfman McInnes   if (!rank) {
16250452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1626416022c9SBarry Smith 
1627416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1628416022c9SBarry Smith     nz = procsnz[0];
1629416022c9SBarry Smith     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1630d65a2f8fSBarry Smith 
1631d65a2f8fSBarry Smith     /* insert into matrix */
1632d65a2f8fSBarry Smith     jj      = rstart;
1633d65a2f8fSBarry Smith     smycols = mycols;
1634d65a2f8fSBarry Smith     svals   = vals;
1635d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1636d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1637d65a2f8fSBarry Smith       smycols += ourlens[i];
1638d65a2f8fSBarry Smith       svals   += ourlens[i];
1639d65a2f8fSBarry Smith       jj++;
1640416022c9SBarry Smith     }
1641416022c9SBarry Smith 
1642d65a2f8fSBarry Smith     /* read in other processors and ship out */
164317699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1644416022c9SBarry Smith       nz = procsnz[i];
1645416022c9SBarry Smith       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1646d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1647416022c9SBarry Smith     }
16480452661fSBarry Smith     PetscFree(procsnz);
1649416022c9SBarry Smith   }
1650d65a2f8fSBarry Smith   else {
1651d65a2f8fSBarry Smith     /* receive numeric values */
16520452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1653416022c9SBarry Smith 
1654d65a2f8fSBarry Smith     /* receive message of values*/
1655d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1656d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1657c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1658d65a2f8fSBarry Smith 
1659d65a2f8fSBarry Smith     /* insert into matrix */
1660d65a2f8fSBarry Smith     jj      = rstart;
1661d65a2f8fSBarry Smith     smycols = mycols;
1662d65a2f8fSBarry Smith     svals   = vals;
1663d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1664d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1665d65a2f8fSBarry Smith       smycols += ourlens[i];
1666d65a2f8fSBarry Smith       svals   += ourlens[i];
1667d65a2f8fSBarry Smith       jj++;
1668d65a2f8fSBarry Smith     }
1669d65a2f8fSBarry Smith   }
16700452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1671d65a2f8fSBarry Smith 
1672d65a2f8fSBarry Smith   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1673d65a2f8fSBarry Smith   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1674416022c9SBarry Smith   return 0;
1675416022c9SBarry Smith }
1676