xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 95e01e2f099a96a22068d8621ba47172d7865843)
1cb512458SBarry Smith #ifndef lint
2*95e01e2fSLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.126 1996/02/09 15:10:11 curfman Exp curfman $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int        ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
408a729477SBarry Smith {
4144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
434b0e389bSBarry Smith   Scalar     value;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
464b0e389bSBarry Smith   int        shift = C->indexshift,roworiented = aij->roworiented;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
534b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
544b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
564b0e389bSBarry Smith       row = im[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
584b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
594b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
604b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
614b0e389bSBarry Smith           col = in[j] - cstart;
624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
634b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
641eb62cbbSBarry Smith         }
651eb62cbbSBarry Smith         else {
66227d817aSBarry Smith           if (mat->was_assembled) {
67b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
684b0e389bSBarry Smith             col = aij->colmap[in[j]] + shift;
69ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
702493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
714b0e389bSBarry Smith               col =  in[j];
72d6dfbf8fSBarry Smith             }
739e25ed09SBarry Smith           }
744b0e389bSBarry Smith           else col = in[j];
754b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
764b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
771eb62cbbSBarry Smith         }
781eb62cbbSBarry Smith       }
791eb62cbbSBarry Smith     }
801eb62cbbSBarry Smith     else {
814b0e389bSBarry Smith       if (roworiented) {
824b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
834b0e389bSBarry Smith       }
844b0e389bSBarry Smith       else {
854b0e389bSBarry Smith         row = im[i];
864b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
874b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
884b0e389bSBarry Smith         }
894b0e389bSBarry Smith       }
901eb62cbbSBarry Smith     }
918a729477SBarry Smith   }
928a729477SBarry Smith   return 0;
938a729477SBarry Smith }
948a729477SBarry Smith 
95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96b49de8d1SLois Curfman McInnes {
97b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
101b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
102b49de8d1SLois Curfman McInnes 
103b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
104b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
107b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
108b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
109b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
112b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
113b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114b49de8d1SLois Curfman McInnes         }
115b49de8d1SLois Curfman McInnes         else {
116b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
117b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
118b49de8d1SLois Curfman McInnes         }
119b49de8d1SLois Curfman McInnes       }
120b49de8d1SLois Curfman McInnes     }
121b49de8d1SLois Curfman McInnes     else {
122b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
123b49de8d1SLois Curfman McInnes     }
124b49de8d1SLois Curfman McInnes   }
125b49de8d1SLois Curfman McInnes   return 0;
126b49de8d1SLois Curfman McInnes }
127b49de8d1SLois Curfman McInnes 
128fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1298a729477SBarry Smith {
13044a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
131d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
13217699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
13317699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1341eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1356abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1361eb62cbbSBarry Smith   InsertMode  addv;
1371eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1381eb62cbbSBarry Smith 
1391eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
140d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
141dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
142bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1431eb62cbbSBarry Smith   }
1441eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1451eb62cbbSBarry Smith 
1461eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1470452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
148cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1490452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1501eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1511eb62cbbSBarry Smith     idx = aij->stash.idx[i];
15217699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1531eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1541eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1558a729477SBarry Smith       }
1568a729477SBarry Smith     }
1578a729477SBarry Smith   }
15817699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1591eb62cbbSBarry Smith 
1601eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1610452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
16217699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
16317699dbbSLois Curfman McInnes   nreceives = work[rank];
16417699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
16517699dbbSLois Curfman McInnes   nmax = work[rank];
1660452661fSBarry Smith   PetscFree(work);
1671eb62cbbSBarry Smith 
1681eb62cbbSBarry Smith   /* post receives:
1691eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1701eb62cbbSBarry Smith      (global index,value) we store the global index as a double
171d6dfbf8fSBarry Smith      to simplify the message passing.
1721eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1731eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1741eb62cbbSBarry Smith      this is a lot of wasted space.
1751eb62cbbSBarry Smith 
1761eb62cbbSBarry Smith 
1771eb62cbbSBarry Smith        This could be done better.
1781eb62cbbSBarry Smith   */
1790452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
18078b31e54SBarry Smith   CHKPTRQ(rvalues);
1810452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
18278b31e54SBarry Smith   CHKPTRQ(recv_waits);
1831eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
184416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1851eb62cbbSBarry Smith               comm,recv_waits+i);
1861eb62cbbSBarry Smith   }
1871eb62cbbSBarry Smith 
1881eb62cbbSBarry Smith   /* do sends:
1891eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1901eb62cbbSBarry Smith          the ith processor
1911eb62cbbSBarry Smith   */
1920452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1930452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
19478b31e54SBarry Smith   CHKPTRQ(send_waits);
1950452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1961eb62cbbSBarry Smith   starts[0] = 0;
19717699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1981eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1991eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2001eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2011eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2021eb62cbbSBarry Smith   }
2030452661fSBarry Smith   PetscFree(owner);
2041eb62cbbSBarry Smith   starts[0] = 0;
20517699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2061eb62cbbSBarry Smith   count = 0;
20717699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2081eb62cbbSBarry Smith     if (procs[i]) {
209416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2101eb62cbbSBarry Smith                 comm,send_waits+count++);
2111eb62cbbSBarry Smith     }
2121eb62cbbSBarry Smith   }
2130452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2141eb62cbbSBarry Smith 
2151eb62cbbSBarry Smith   /* Free cache space */
2162191d07cSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
21778b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2181eb62cbbSBarry Smith 
2191eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2201eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2211eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2221eb62cbbSBarry Smith   aij->rmax       = nmax;
2231eb62cbbSBarry Smith 
2241eb62cbbSBarry Smith   return 0;
2251eb62cbbSBarry Smith }
22644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2271eb62cbbSBarry Smith 
228fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2291eb62cbbSBarry Smith {
23044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
231dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2321eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
233416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
234416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2351eb62cbbSBarry Smith   Scalar      *values,val;
2361eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2371eb62cbbSBarry Smith 
2381eb62cbbSBarry Smith   /*  wait on receives */
2391eb62cbbSBarry Smith   while (count) {
240d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2411eb62cbbSBarry Smith     /* unpack receives into our local space */
242d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
243c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2441eb62cbbSBarry Smith     n = n/3;
2451eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
246227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
247227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2481eb62cbbSBarry Smith       val = values[3*i+2];
2491eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2501eb62cbbSBarry Smith         col -= aij->cstart;
2511eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2521eb62cbbSBarry Smith       }
2531eb62cbbSBarry Smith       else {
254227d817aSBarry Smith         if (mat->was_assembled) {
255b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
256dbb450caSBarry Smith           col = aij->colmap[col] + shift;
257ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2582493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
259227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
260d6dfbf8fSBarry Smith           }
2619e25ed09SBarry Smith         }
2621eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2631eb62cbbSBarry Smith       }
2641eb62cbbSBarry Smith     }
2651eb62cbbSBarry Smith     count--;
2661eb62cbbSBarry Smith   }
2670452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2681eb62cbbSBarry Smith 
2691eb62cbbSBarry Smith   /* wait on sends */
2701eb62cbbSBarry Smith   if (aij->nsends) {
2710452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
27278b31e54SBarry Smith     CHKPTRQ(send_status);
2731eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2740452661fSBarry Smith     PetscFree(send_status);
2751eb62cbbSBarry Smith   }
2760452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2771eb62cbbSBarry Smith 
27864119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
27978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
28078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2811eb62cbbSBarry Smith 
2822493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2832493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
284227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
285227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
2862493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2872493cbb0SBarry Smith   }
2882493cbb0SBarry Smith 
289227d817aSBarry Smith   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
29078b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
2915e42470aSBarry Smith   }
29278b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
29378b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2945e42470aSBarry Smith 
2958a729477SBarry Smith   return 0;
2968a729477SBarry Smith }
2978a729477SBarry Smith 
2982ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
2991eb62cbbSBarry Smith {
30044a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
301dbd7a890SLois Curfman McInnes   int        ierr;
30278b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
30378b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3041eb62cbbSBarry Smith   return 0;
3051eb62cbbSBarry Smith }
3061eb62cbbSBarry Smith 
3071eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3081eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3091eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3101eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3111eb62cbbSBarry Smith    routine.
3121eb62cbbSBarry Smith */
31344a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3141eb62cbbSBarry Smith {
31544a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
31617699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3176abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
31817699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3195392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
320d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
321d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3221eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3231eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3241eb62cbbSBarry Smith   IS             istmp;
3251eb62cbbSBarry Smith 
32678b31e54SBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
32778b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3281eb62cbbSBarry Smith 
3291eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3300452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
331cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3320452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3331eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3341eb62cbbSBarry Smith     idx = rows[i];
3351eb62cbbSBarry Smith     found = 0;
33617699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3371eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3381eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3391eb62cbbSBarry Smith       }
3401eb62cbbSBarry Smith     }
341bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3421eb62cbbSBarry Smith   }
34317699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3441eb62cbbSBarry Smith 
3451eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3460452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
34717699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
34817699dbbSLois Curfman McInnes   nrecvs = work[rank];
34917699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
35017699dbbSLois Curfman McInnes   nmax = work[rank];
3510452661fSBarry Smith   PetscFree(work);
3521eb62cbbSBarry Smith 
3531eb62cbbSBarry Smith   /* post receives:   */
3540452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
35578b31e54SBarry Smith   CHKPTRQ(rvalues);
3560452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
35778b31e54SBarry Smith   CHKPTRQ(recv_waits);
3581eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
359416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3601eb62cbbSBarry Smith   }
3611eb62cbbSBarry Smith 
3621eb62cbbSBarry Smith   /* do sends:
3631eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3641eb62cbbSBarry Smith          the ith processor
3651eb62cbbSBarry Smith   */
3660452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3670452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
36878b31e54SBarry Smith   CHKPTRQ(send_waits);
3690452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3701eb62cbbSBarry Smith   starts[0] = 0;
37117699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3721eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3731eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3741eb62cbbSBarry Smith   }
3751eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3761eb62cbbSBarry Smith 
3771eb62cbbSBarry Smith   starts[0] = 0;
37817699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3791eb62cbbSBarry Smith   count = 0;
38017699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3811eb62cbbSBarry Smith     if (procs[i]) {
382416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3831eb62cbbSBarry Smith     }
3841eb62cbbSBarry Smith   }
3850452661fSBarry Smith   PetscFree(starts);
3861eb62cbbSBarry Smith 
38717699dbbSLois Curfman McInnes   base = owners[rank];
3881eb62cbbSBarry Smith 
3891eb62cbbSBarry Smith   /*  wait on receives */
3900452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3911eb62cbbSBarry Smith   source = lens + nrecvs;
3921eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3931eb62cbbSBarry Smith   while (count) {
394d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3951eb62cbbSBarry Smith     /* unpack receives into our local space */
3961eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
397d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
398d6dfbf8fSBarry Smith     lens[imdex]  = n;
3991eb62cbbSBarry Smith     slen += n;
4001eb62cbbSBarry Smith     count--;
4011eb62cbbSBarry Smith   }
4020452661fSBarry Smith   PetscFree(recv_waits);
4031eb62cbbSBarry Smith 
4041eb62cbbSBarry Smith   /* move the data into the send scatter */
4050452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4061eb62cbbSBarry Smith   count = 0;
4071eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4081eb62cbbSBarry Smith     values = rvalues + i*nmax;
4091eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4101eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4111eb62cbbSBarry Smith     }
4121eb62cbbSBarry Smith   }
4130452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4140452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4151eb62cbbSBarry Smith 
4161eb62cbbSBarry Smith   /* actually zap the local rows */
417416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
418464493b3SBarry Smith   PLogObjectParent(A,istmp);
4190452661fSBarry Smith   PetscFree(lrows);
42078b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
42178b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
42278b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4231eb62cbbSBarry Smith 
4241eb62cbbSBarry Smith   /* wait on sends */
4251eb62cbbSBarry Smith   if (nsends) {
4260452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
42778b31e54SBarry Smith     CHKPTRQ(send_status);
4281eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4290452661fSBarry Smith     PetscFree(send_status);
4301eb62cbbSBarry Smith   }
4310452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4321eb62cbbSBarry Smith 
4331eb62cbbSBarry Smith   return 0;
4341eb62cbbSBarry Smith }
4351eb62cbbSBarry Smith 
436416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4371eb62cbbSBarry Smith {
438416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
4391eb62cbbSBarry Smith   int        ierr;
440416022c9SBarry Smith 
44164119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
442416022c9SBarry Smith   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
44364119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
444416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4451eb62cbbSBarry Smith   return 0;
4461eb62cbbSBarry Smith }
4471eb62cbbSBarry Smith 
448416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
449da3a660dSBarry Smith {
450416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
451da3a660dSBarry Smith   int        ierr;
45264119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
453416022c9SBarry Smith   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
45464119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
455416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
456da3a660dSBarry Smith   return 0;
457da3a660dSBarry Smith }
458da3a660dSBarry Smith 
459416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
460da3a660dSBarry Smith {
461416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
462da3a660dSBarry Smith   int        ierr;
463da3a660dSBarry Smith 
464da3a660dSBarry Smith   /* do nondiagonal part */
465416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
466da3a660dSBarry Smith   /* send it on its way */
467416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
46864119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
469da3a660dSBarry Smith   /* do local part */
470416022c9SBarry Smith   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
471da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
472da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
473da3a660dSBarry Smith   /* but is not perhaps always true. */
474416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
47564119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
476da3a660dSBarry Smith   return 0;
477da3a660dSBarry Smith }
478da3a660dSBarry Smith 
479416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
480da3a660dSBarry Smith {
481416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
482da3a660dSBarry Smith   int        ierr;
483da3a660dSBarry Smith 
484da3a660dSBarry Smith   /* do nondiagonal part */
485416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
486da3a660dSBarry Smith   /* send it on its way */
487416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
48864119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
489da3a660dSBarry Smith   /* do local part */
490416022c9SBarry Smith   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
491da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
492da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
493da3a660dSBarry Smith   /* but is not perhaps always true. */
494416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
49564119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
496da3a660dSBarry Smith   return 0;
497da3a660dSBarry Smith }
498da3a660dSBarry Smith 
4991eb62cbbSBarry Smith /*
5001eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5011eb62cbbSBarry Smith    diagonal block
5021eb62cbbSBarry Smith */
503416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5041eb62cbbSBarry Smith {
505416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
506416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5071eb62cbbSBarry Smith }
5081eb62cbbSBarry Smith 
509052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
510052efed2SBarry Smith {
511052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
512052efed2SBarry Smith   int        ierr;
513052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
514052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
515052efed2SBarry Smith   return 0;
516052efed2SBarry Smith }
517052efed2SBarry Smith 
51844a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5191eb62cbbSBarry Smith {
5201eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
52144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5221eb62cbbSBarry Smith   int        ierr;
523a5a9c739SBarry Smith #if defined(PETSC_LOG)
5246e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
525a5a9c739SBarry Smith #endif
5260452661fSBarry Smith   PetscFree(aij->rowners);
52778b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
52878b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5290452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5300452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5311eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
532a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5330452661fSBarry Smith   PetscFree(aij);
534a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5350452661fSBarry Smith   PetscHeaderDestroy(mat);
5361eb62cbbSBarry Smith   return 0;
5371eb62cbbSBarry Smith }
5387857610eSBarry Smith #include "draw.h"
539b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
540ee50ffe9SBarry Smith 
541416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5421eb62cbbSBarry Smith {
543416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
544416022c9SBarry Smith   int         ierr;
545416022c9SBarry Smith 
54617699dbbSLois Curfman McInnes   if (aij->size == 1) {
547416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
548416022c9SBarry Smith   }
549a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
550416022c9SBarry Smith   return 0;
551416022c9SBarry Smith }
552416022c9SBarry Smith 
553d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
554416022c9SBarry Smith {
55544a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
556dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
557a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
558d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
559d636dbe3SBarry Smith   FILE        *fd;
560416022c9SBarry Smith 
561416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
562416022c9SBarry Smith     ierr = ViewerFileGetFormat_Private(viewer,&format);
56308480c60SBarry Smith     if (format == FILE_FORMAT_INFO_DETAILED) {
564*95e01e2fSLois Curfman McInnes       int nz, nzalloc, mem, flg;
565a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
566227d817aSBarry Smith       ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
567a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
568*95e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
569a56f8943SBarry Smith       MPIU_Seq_begin(mat->comm,1);
570*95e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
571*95e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
572*95e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
573*95e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
57408480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
575*95e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz);
57608480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
577*95e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz);
578a56f8943SBarry Smith       fflush(fd);
579a56f8943SBarry Smith       MPIU_Seq_end(mat->comm,1);
580a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
581416022c9SBarry Smith       return 0;
582416022c9SBarry Smith     }
58308480c60SBarry Smith     else if (format == FILE_FORMAT_INFO) {
58408480c60SBarry Smith       return 0;
58508480c60SBarry Smith     }
586416022c9SBarry Smith   }
587416022c9SBarry Smith 
588416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER) {
589227d817aSBarry Smith     ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
5907f813858SBarry Smith     MPIU_Seq_begin(mat->comm,1);
591d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
59217699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5931eb62cbbSBarry Smith            aij->cend);
59478b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
59578b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
596d13ab20cSBarry Smith     fflush(fd);
5977f813858SBarry Smith     MPIU_Seq_end(mat->comm,1);
598d13ab20cSBarry Smith   }
599416022c9SBarry Smith   else {
600a56f8943SBarry Smith     int size = aij->size;
601a56f8943SBarry Smith     rank = aij->rank;
60217699dbbSLois Curfman McInnes     if (size == 1) {
60378b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
60495373324SBarry Smith     }
60595373324SBarry Smith     else {
60695373324SBarry Smith       /* assemble the entire matrix onto first processor. */
60795373324SBarry Smith       Mat         A;
608ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6092eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
61095373324SBarry Smith       Scalar      *a;
6112ee70a88SLois Curfman McInnes 
61217699dbbSLois Curfman McInnes       if (!rank) {
613b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
614c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
61595373324SBarry Smith       }
61695373324SBarry Smith       else {
617b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
618c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
61995373324SBarry Smith       }
620464493b3SBarry Smith       PLogObjectParent(mat,A);
621416022c9SBarry Smith 
62295373324SBarry Smith       /* copy over the A part */
623ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6242ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
62595373324SBarry Smith       row = aij->rstart;
626dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
62795373324SBarry Smith       for ( i=0; i<m; i++ ) {
628416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
62995373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
63095373324SBarry Smith       }
6312ee70a88SLois Curfman McInnes       aj = Aloc->j;
632dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
63395373324SBarry Smith 
63495373324SBarry Smith       /* copy over the B part */
635ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6362ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
63795373324SBarry Smith       row = aij->rstart;
6380452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
639dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
64095373324SBarry Smith       for ( i=0; i<m; i++ ) {
641416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
64295373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
64395373324SBarry Smith       }
6440452661fSBarry Smith       PetscFree(ct);
64578b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
64678b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
64717699dbbSLois Curfman McInnes       if (!rank) {
64878b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
64995373324SBarry Smith       }
65078b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
65195373324SBarry Smith     }
65295373324SBarry Smith   }
6531eb62cbbSBarry Smith   return 0;
6541eb62cbbSBarry Smith }
6551eb62cbbSBarry Smith 
656416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
657416022c9SBarry Smith {
658416022c9SBarry Smith   Mat         mat = (Mat) obj;
659416022c9SBarry Smith   int         ierr;
660416022c9SBarry Smith   PetscObject vobj = (PetscObject) viewer;
661416022c9SBarry Smith 
662416022c9SBarry Smith   if (!viewer) {
663416022c9SBarry Smith     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
664416022c9SBarry Smith   }
665416022c9SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
666416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
667d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
668416022c9SBarry Smith   }
669416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
670d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
671d7e8b826SBarry Smith   }
672d7e8b826SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
673d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
674416022c9SBarry Smith   }
675416022c9SBarry Smith   else if (vobj->cookie == DRAW_COOKIE) {
676d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
677416022c9SBarry Smith   }
678416022c9SBarry Smith   else if (vobj->type == BINARY_FILE_VIEWER) {
679416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
680416022c9SBarry Smith   }
681416022c9SBarry Smith   return 0;
682416022c9SBarry Smith }
683416022c9SBarry Smith 
684ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat);
6851eb62cbbSBarry Smith /*
6861eb62cbbSBarry Smith     This has to provide several versions.
6871eb62cbbSBarry Smith 
6881eb62cbbSBarry Smith      1) per sequential
6891eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6901eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
691d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
6921eb62cbbSBarry Smith */
693fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
694dbb450caSBarry Smith                            double fshift,int its,Vec xx)
6958a729477SBarry Smith {
69644a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
697d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
698ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
6996abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
7006abc6512SBarry Smith   int        ierr,*idx, *diag;
701416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
702da3a660dSBarry Smith   Vec        tt;
7038a729477SBarry Smith 
704d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
705dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
706dbb450caSBarry Smith   ls = ls + shift;
707ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
708d6dfbf8fSBarry Smith   diag = A->diag;
709acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
71048d91487SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
711acb40c82SBarry Smith   }
712da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
713da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
714da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
715da3a660dSBarry Smith 
716da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
717da3a660dSBarry Smith 
718da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
719da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
720da3a660dSBarry Smith     is the relaxation factor.
721da3a660dSBarry Smith     */
72278b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
723464493b3SBarry Smith     PLogObjectParent(matin,tt);
724da3a660dSBarry Smith     VecGetArray(tt,&t);
725da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
726da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
727da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
72864119d58SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
72978b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
730da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
731da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
732dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
733dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
734da3a660dSBarry Smith       sum  = b[i];
735da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
736dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
737da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
738dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
739dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
740da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
741da3a660dSBarry Smith       x[i] = omega*(sum/d);
742da3a660dSBarry Smith     }
74364119d58SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
74478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
745da3a660dSBarry Smith 
746da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
747da3a660dSBarry Smith     v = A->a;
748dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
749da3a660dSBarry Smith 
750da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
751dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
752da3a660dSBarry Smith     diag = A->diag;
753da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
75464119d58SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
75578b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
756da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
757da3a660dSBarry Smith       n    = diag[i] - A->i[i];
758dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
759dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
760da3a660dSBarry Smith       sum  = t[i];
761da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
762dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
763da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
764dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
765dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
766da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
767da3a660dSBarry Smith       t[i] = omega*(sum/d);
768da3a660dSBarry Smith     }
76964119d58SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
77078b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
771da3a660dSBarry Smith     /*  x = x + t */
772da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
773da3a660dSBarry Smith     VecDestroy(tt);
774da3a660dSBarry Smith     return 0;
775da3a660dSBarry Smith   }
776da3a660dSBarry Smith 
7771eb62cbbSBarry Smith 
778d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
779da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
780da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
781da3a660dSBarry Smith     }
782da3a660dSBarry Smith     else {
78364119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
78478b31e54SBarry Smith       CHKERRQ(ierr);
78564119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
78678b31e54SBarry Smith       CHKERRQ(ierr);
787da3a660dSBarry Smith     }
788d6dfbf8fSBarry Smith     while (its--) {
789d6dfbf8fSBarry Smith       /* go down through the rows */
79064119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
79178b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
792d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
793d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
794dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
795dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
796d6dfbf8fSBarry Smith         sum  = b[i];
797d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
798dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
799d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
800dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
801dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
802d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
803d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
804d6dfbf8fSBarry Smith       }
80564119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
80678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
807d6dfbf8fSBarry Smith       /* come up through the rows */
80864119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
80978b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
810d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
811d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
812dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
813dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
814d6dfbf8fSBarry Smith         sum  = b[i];
815d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
816dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
817d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
818dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
819dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
820d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
821d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
822d6dfbf8fSBarry Smith       }
82364119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
82478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
825d6dfbf8fSBarry Smith     }
826d6dfbf8fSBarry Smith   }
827d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
828da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
829da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
83064119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
83178b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
832da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
833da3a660dSBarry Smith         n    = diag[i] - A->i[i];
834dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
835dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
836da3a660dSBarry Smith         sum  = b[i];
837da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
838dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
839da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
840dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
841dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
842da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
843da3a660dSBarry Smith         x[i] = omega*(sum/d);
844da3a660dSBarry Smith       }
84564119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
84678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
847da3a660dSBarry Smith       its--;
848da3a660dSBarry Smith     }
849d6dfbf8fSBarry Smith     while (its--) {
85064119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
85178b31e54SBarry Smith       CHKERRQ(ierr);
85264119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
85378b31e54SBarry Smith       CHKERRQ(ierr);
85464119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
85578b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
856d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
857d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
858dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
859dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
860d6dfbf8fSBarry Smith         sum  = b[i];
861d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
862dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
863d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
864dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
865dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
866d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
867d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
868d6dfbf8fSBarry Smith       }
86964119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
87078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
871d6dfbf8fSBarry Smith     }
872d6dfbf8fSBarry Smith   }
873d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
874da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
875da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
87664119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
87778b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
878da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
879da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
880dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
881dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
882da3a660dSBarry Smith         sum  = b[i];
883da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
884dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
885da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
886dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
887dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
888da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
889da3a660dSBarry Smith         x[i] = omega*(sum/d);
890da3a660dSBarry Smith       }
89164119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
89278b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
893da3a660dSBarry Smith       its--;
894da3a660dSBarry Smith     }
895d6dfbf8fSBarry Smith     while (its--) {
89664119d58SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
89778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
89864119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
89978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
90064119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
90178b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
902d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
903d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
904dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
905dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
906d6dfbf8fSBarry Smith         sum  = b[i];
907d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
908dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
909d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
910dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
911dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
912d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
913d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
914d6dfbf8fSBarry Smith       }
91564119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
91678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
917d6dfbf8fSBarry Smith     }
918d6dfbf8fSBarry Smith   }
919d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
920da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
921dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
922da3a660dSBarry Smith     }
92364119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
92478b31e54SBarry Smith     CHKERRQ(ierr);
92564119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
92678b31e54SBarry Smith     CHKERRQ(ierr);
927d6dfbf8fSBarry Smith     while (its--) {
928d6dfbf8fSBarry Smith       /* go down through the rows */
929d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
930d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
931dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
932dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
933d6dfbf8fSBarry Smith         sum  = b[i];
934d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
935dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
936d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
937dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
938dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
939d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
940d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
941d6dfbf8fSBarry Smith       }
942d6dfbf8fSBarry Smith       /* come up through the rows */
943d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
944d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
945dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
946dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
947d6dfbf8fSBarry Smith         sum  = b[i];
948d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
949dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
950d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
951dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
952dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
953d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
954d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
955d6dfbf8fSBarry Smith       }
956d6dfbf8fSBarry Smith     }
957d6dfbf8fSBarry Smith   }
958d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
959da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
960dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
961da3a660dSBarry Smith     }
96264119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
96378b31e54SBarry Smith     CHKERRQ(ierr);
96464119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
96578b31e54SBarry Smith     CHKERRQ(ierr);
966d6dfbf8fSBarry Smith     while (its--) {
967d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
968d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
969dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
970dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
971d6dfbf8fSBarry Smith         sum  = b[i];
972d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
973dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
974d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
975dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
976dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
977d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
978d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
979d6dfbf8fSBarry Smith       }
980d6dfbf8fSBarry Smith     }
981d6dfbf8fSBarry Smith   }
982d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
983da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
984dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
985da3a660dSBarry Smith     }
98664119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
98778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
98864119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
98978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
990d6dfbf8fSBarry Smith     while (its--) {
991d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
992d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
993dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
994dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
995d6dfbf8fSBarry Smith         sum  = b[i];
996d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
997dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
998d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
999dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
1000dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
1001d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1002d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1003d6dfbf8fSBarry Smith       }
1004d6dfbf8fSBarry Smith     }
1005d6dfbf8fSBarry Smith   }
10068a729477SBarry Smith   return 0;
10078a729477SBarry Smith }
1008a66be287SLois Curfman McInnes 
1009fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1010fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
1011a66be287SLois Curfman McInnes {
1012a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1013a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
1014a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1015a66be287SLois Curfman McInnes 
101678b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
101778b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1018a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1019a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
1020a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1021a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1022d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1023a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1024a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1025d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1026a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1027a66be287SLois Curfman McInnes   }
1028a66be287SLois Curfman McInnes   return 0;
1029a66be287SLois Curfman McInnes }
1030a66be287SLois Curfman McInnes 
1031299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1032299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1033299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1034299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1035299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1036299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1037299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1038299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1039299609e3SLois Curfman McInnes 
1040416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1041c74985f6SBarry Smith {
1042c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1043c74985f6SBarry Smith 
1044c0bbcb79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
1045c0bbcb79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
1046c0bbcb79SLois Curfman McInnes       op == COLUMNS_SORTED ||
1047c0bbcb79SLois Curfman McInnes       op == ROW_ORIENTED) {
1048c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
1049c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
1050c74985f6SBarry Smith   }
1051c0bbcb79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
1052c0bbcb79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
1053c0bbcb79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1054c0bbcb79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
1055c0bbcb79SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10564b0e389bSBarry Smith   else if (op == COLUMN_ORIENTED) {
10574b0e389bSBarry Smith     a->roworiented = 0;
10584b0e389bSBarry Smith     MatSetOption(a->A,op);
10594b0e389bSBarry Smith     MatSetOption(a->B,op);
10604b0e389bSBarry Smith   }
1061c0bbcb79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
10624d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1063c0bbcb79SLois Curfman McInnes   else
10644d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1065c74985f6SBarry Smith   return 0;
1066c74985f6SBarry Smith }
1067c74985f6SBarry Smith 
1068d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1069c74985f6SBarry Smith {
107044a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1071c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1072c74985f6SBarry Smith   return 0;
1073c74985f6SBarry Smith }
1074c74985f6SBarry Smith 
1075d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1076c74985f6SBarry Smith {
107744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1078b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1079c74985f6SBarry Smith   return 0;
1080c74985f6SBarry Smith }
1081c74985f6SBarry Smith 
1082d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1083c74985f6SBarry Smith {
108444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1085c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1086c74985f6SBarry Smith   return 0;
1087c74985f6SBarry Smith }
1088c74985f6SBarry Smith 
108939e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
109039e00950SLois Curfman McInnes {
1091154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1092154123eaSLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1093154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1094154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
109539e00950SLois Curfman McInnes 
1096b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1097abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
109839e00950SLois Curfman McInnes 
1099154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1100154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1101154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
110278b31e54SBarry Smith   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
110378b31e54SBarry Smith   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1104154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1105154123eaSLois Curfman McInnes 
1106154123eaSLois Curfman McInnes   if (v  || idx) {
1107154123eaSLois Curfman McInnes     if (nztot) {
1108154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1109299609e3SLois Curfman McInnes       int imark;
1110154123eaSLois Curfman McInnes       for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1111154123eaSLois Curfman McInnes       if (v) {
11120452661fSBarry Smith         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
111339e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1114154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1115154123eaSLois Curfman McInnes           else break;
1116154123eaSLois Curfman McInnes         }
1117154123eaSLois Curfman McInnes         imark = i;
1118154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1119299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1120154123eaSLois Curfman McInnes       }
1121154123eaSLois Curfman McInnes       if (idx) {
11220452661fSBarry Smith         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1123154123eaSLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1124154123eaSLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1125154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1126154123eaSLois Curfman McInnes           else break;
1127154123eaSLois Curfman McInnes         }
1128154123eaSLois Curfman McInnes         imark = i;
1129154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1130299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
113139e00950SLois Curfman McInnes       }
113239e00950SLois Curfman McInnes     }
113339e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1134154123eaSLois Curfman McInnes   }
113539e00950SLois Curfman McInnes   *nz = nztot;
113678b31e54SBarry Smith   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
113778b31e54SBarry Smith   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
113839e00950SLois Curfman McInnes   return 0;
113939e00950SLois Curfman McInnes }
114039e00950SLois Curfman McInnes 
114139e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
114239e00950SLois Curfman McInnes {
11430452661fSBarry Smith   if (idx) PetscFree(*idx);
11440452661fSBarry Smith   if (v) PetscFree(*v);
114539e00950SLois Curfman McInnes   return 0;
114639e00950SLois Curfman McInnes }
114739e00950SLois Curfman McInnes 
1148cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1149855ac2c5SLois Curfman McInnes {
1150855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1151ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1152416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1153416022c9SBarry Smith   double     sum = 0.0;
115404ca555eSLois Curfman McInnes   Scalar     *v;
115504ca555eSLois Curfman McInnes 
115617699dbbSLois Curfman McInnes   if (aij->size == 1) {
115714183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
115837fa93a5SLois Curfman McInnes   } else {
115904ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
116004ca555eSLois Curfman McInnes       v = amat->a;
116104ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
116204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
116304ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
116404ca555eSLois Curfman McInnes #else
116504ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
116604ca555eSLois Curfman McInnes #endif
116704ca555eSLois Curfman McInnes       }
116804ca555eSLois Curfman McInnes       v = bmat->a;
116904ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
117004ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
117104ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
117204ca555eSLois Curfman McInnes #else
117304ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
117404ca555eSLois Curfman McInnes #endif
117504ca555eSLois Curfman McInnes       }
117604ca555eSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
117704ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
117804ca555eSLois Curfman McInnes     }
117904ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
118004ca555eSLois Curfman McInnes       double *tmp, *tmp2;
118104ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
11820452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
11830452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1184cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
118504ca555eSLois Curfman McInnes       *norm = 0.0;
118604ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
118704ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1188579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
118904ca555eSLois Curfman McInnes       }
119004ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
119104ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1192579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
119304ca555eSLois Curfman McInnes       }
119404ca555eSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
119504ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
119604ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
119704ca555eSLois Curfman McInnes       }
11980452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
119904ca555eSLois Curfman McInnes     }
120004ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1201515d9167SLois Curfman McInnes       double ntemp = 0.0;
120204ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1203dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
120404ca555eSLois Curfman McInnes         sum = 0.0;
120504ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1206cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
120704ca555eSLois Curfman McInnes         }
1208dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
120904ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1210cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
121104ca555eSLois Curfman McInnes         }
1212515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
121304ca555eSLois Curfman McInnes       }
1214515d9167SLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
121504ca555eSLois Curfman McInnes     }
121604ca555eSLois Curfman McInnes     else {
1217515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
121804ca555eSLois Curfman McInnes     }
121937fa93a5SLois Curfman McInnes   }
1220855ac2c5SLois Curfman McInnes   return 0;
1221855ac2c5SLois Curfman McInnes }
1222855ac2c5SLois Curfman McInnes 
12230de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1224b7c46309SBarry Smith {
1225b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1226dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1227416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1228416022c9SBarry Smith   Mat        B;
1229b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1230b7c46309SBarry Smith   Scalar     *array;
1231b7c46309SBarry Smith 
12323638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
12333638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1234b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1235b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1236b7c46309SBarry Smith 
1237b7c46309SBarry Smith   /* copy over the A part */
1238ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1239b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1240b7c46309SBarry Smith   row = a->rstart;
1241dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1242b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1243416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1244b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1245b7c46309SBarry Smith   }
1246b7c46309SBarry Smith   aj = Aloc->j;
12474af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1248b7c46309SBarry Smith 
1249b7c46309SBarry Smith   /* copy over the B part */
1250ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1251b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1252b7c46309SBarry Smith   row = a->rstart;
12530452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1254dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1255b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1256416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1257b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1258b7c46309SBarry Smith   }
12590452661fSBarry Smith   PetscFree(ct);
1260b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1261b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
12623638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
12630de55854SLois Curfman McInnes     *matout = B;
12640de55854SLois Curfman McInnes   } else {
12650de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12660452661fSBarry Smith     PetscFree(a->rowners);
12670de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
12680de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
12690452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
12700452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
12710de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1272a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
12730452661fSBarry Smith     PetscFree(a);
1274416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
12750452661fSBarry Smith     PetscHeaderDestroy(B);
12760de55854SLois Curfman McInnes   }
1277b7c46309SBarry Smith   return 0;
1278b7c46309SBarry Smith }
1279b7c46309SBarry Smith 
1280682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1281682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1282682d7d0cSBarry Smith {
1283682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1284682d7d0cSBarry Smith 
1285682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1286682d7d0cSBarry Smith   else return 0;
1287682d7d0cSBarry Smith }
1288682d7d0cSBarry Smith 
1289fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
12903d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
12912f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1292598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
12938a729477SBarry Smith /* -------------------------------------------------------------------*/
12942ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
129539e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
129644a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
129744a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1298299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1299299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1300299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
130144a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1302b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1303a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1304855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1305ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13061eb62cbbSBarry Smith        0,
1307299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1308299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1309299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1310d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1311299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
13123d1612f7SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1313b49de8d1SLois Curfman McInnes        0,0,0,
1314598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1315052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
1316052efed2SBarry Smith        MatScale_MPIAIJ};
13178a729477SBarry Smith 
13181987afe7SBarry Smith /*@C
1319ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
13203a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
13213a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
13223a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
13233a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
13248a729477SBarry Smith 
13258a729477SBarry Smith    Input Parameters:
13261eb62cbbSBarry Smith .  comm - MPI communicator
13277d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
13287d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
13297d3e4905SLois Curfman McInnes            if N is given)
13307d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
13317d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
13327d3e4905SLois Curfman McInnes            if n is given)
1333ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1334ff756334SLois Curfman McInnes            (same for all local rows)
1335ab693e5aSLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1336ab693e5aSLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1337ab693e5aSLois Curfman McInnes            for the diagonal entry even if it is zero.
1338ab693e5aSLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1339ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
1340ab693e5aSLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1341ab693e5aSLois Curfman McInnes            submatrix or null (possibly different for each row).
13428a729477SBarry Smith 
1343ff756334SLois Curfman McInnes    Output Parameter:
13448a729477SBarry Smith .  newmat - the matrix
13458a729477SBarry Smith 
1346ff756334SLois Curfman McInnes    Notes:
1347ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1348ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
13490002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
13500002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1351ff756334SLois Curfman McInnes 
1352ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1353ff756334SLois Curfman McInnes    (possibly both).
1354ff756334SLois Curfman McInnes 
13555511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
13565511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
13575511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
13585511cfe3SLois Curfman McInnes 
13595511cfe3SLois Curfman McInnes    Options Database Keys:
13605511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
13616e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
13626e62573dSLois Curfman McInnes $        (max limit=5)
13636e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
13646e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
13656e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
13665511cfe3SLois Curfman McInnes 
1367e0245417SLois Curfman McInnes    Storage Information:
1368e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1369e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1370e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1371e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1372e0245417SLois Curfman McInnes 
1373e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
13745ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
13755ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
13765ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
13775ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1378ff756334SLois Curfman McInnes 
13795511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
13805511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
13812191d07cSBarry Smith 
1382b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1383b810aeb4SBarry Smith $         -------------------
13845511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
13855511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
13865511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
13875511cfe3SLois Curfman McInnes $         -------------------
1388b810aeb4SBarry Smith $
1389b810aeb4SBarry Smith 
13905511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
13915511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
13925511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
13935511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
13945511cfe3SLois Curfman McInnes 
13955511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
13965511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
13975511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
13983a511b96SLois Curfman McInnes    one expects d_nz >> o_nz.   For additional details, see the users manual
13993a511b96SLois Curfman McInnes    chapter on matrices and the file $(PETSC_DIR)/Performance.
14003a511b96SLois Curfman McInnes 
1401dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1402ff756334SLois Curfman McInnes 
1403fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
14048a729477SBarry Smith @*/
14051eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
14061eb62cbbSBarry Smith                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
14078a729477SBarry Smith {
14088a729477SBarry Smith   Mat          mat;
1409416022c9SBarry Smith   Mat_MPIAIJ   *a;
14106abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1411416022c9SBarry Smith 
14128a729477SBarry Smith   *newmat         = 0;
14130452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1414a5a9c739SBarry Smith   PLogObjectCreate(mat);
14150452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1416416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
141744a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
141844a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
14198a729477SBarry Smith   mat->factor     = 0;
1420c456f294SBarry Smith   mat->assembled  = PETSC_FALSE;
1421d6dfbf8fSBarry Smith 
142264119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
142317699dbbSLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
142417699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
14251eb62cbbSBarry Smith 
1426b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
14271987afe7SBarry Smith     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
14281987afe7SBarry Smith 
1429dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
14301eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1431d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1432dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1433dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
14341eb62cbbSBarry Smith   }
143517699dbbSLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
143617699dbbSLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1437416022c9SBarry Smith   a->m = m;
1438416022c9SBarry Smith   a->n = n;
1439416022c9SBarry Smith   a->N = N;
1440416022c9SBarry Smith   a->M = M;
14411eb62cbbSBarry Smith 
14421eb62cbbSBarry Smith   /* build local table of row and column ownerships */
14430452661fSBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1444579c6b6fSBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
144517699dbbSLois Curfman McInnes   a->cowners = a->rowners + a->size + 1;
1446416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1447416022c9SBarry Smith   a->rowners[0] = 0;
144817699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1449416022c9SBarry Smith     a->rowners[i] += a->rowners[i-1];
14508a729477SBarry Smith   }
145117699dbbSLois Curfman McInnes   a->rstart = a->rowners[a->rank];
145217699dbbSLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
1453416022c9SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1454416022c9SBarry Smith   a->cowners[0] = 0;
145517699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1456416022c9SBarry Smith     a->cowners[i] += a->cowners[i-1];
14578a729477SBarry Smith   }
145817699dbbSLois Curfman McInnes   a->cstart = a->cowners[a->rank];
145917699dbbSLois Curfman McInnes   a->cend   = a->cowners[a->rank+1];
14608a729477SBarry Smith 
14615ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1462416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1463416022c9SBarry Smith   PLogObjectParent(mat,a->A);
14647b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1465416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1466416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14678a729477SBarry Smith 
14681eb62cbbSBarry Smith   /* build cache for off array entries formed */
1469416022c9SBarry Smith   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1470416022c9SBarry Smith   a->colmap      = 0;
1471416022c9SBarry Smith   a->garray      = 0;
14724b0e389bSBarry Smith   a->roworiented = 1;
14738a729477SBarry Smith 
14741eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
1475416022c9SBarry Smith   a->lvec      = 0;
1476416022c9SBarry Smith   a->Mvctx     = 0;
14778a729477SBarry Smith 
1478d6dfbf8fSBarry Smith   *newmat = mat;
1479d6dfbf8fSBarry Smith   return 0;
1480d6dfbf8fSBarry Smith }
1481c74985f6SBarry Smith 
14823d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1483d6dfbf8fSBarry Smith {
1484d6dfbf8fSBarry Smith   Mat        mat;
1485416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
148625cdf11fSBarry Smith   int        ierr, len,flg;
1487d6dfbf8fSBarry Smith 
1488416022c9SBarry Smith   *newmat       = 0;
14890452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1490a5a9c739SBarry Smith   PLogObjectCreate(mat);
14910452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1492416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
149344a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
149444a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1495d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1496c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1497d6dfbf8fSBarry Smith 
1498416022c9SBarry Smith   a->m          = oldmat->m;
1499416022c9SBarry Smith   a->n          = oldmat->n;
1500416022c9SBarry Smith   a->M          = oldmat->M;
1501416022c9SBarry Smith   a->N          = oldmat->N;
1502d6dfbf8fSBarry Smith 
1503416022c9SBarry Smith   a->rstart     = oldmat->rstart;
1504416022c9SBarry Smith   a->rend       = oldmat->rend;
1505416022c9SBarry Smith   a->cstart     = oldmat->cstart;
1506416022c9SBarry Smith   a->cend       = oldmat->cend;
150717699dbbSLois Curfman McInnes   a->size       = oldmat->size;
150817699dbbSLois Curfman McInnes   a->rank       = oldmat->rank;
150964119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
1510d6dfbf8fSBarry Smith 
15110452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
151217699dbbSLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
151317699dbbSLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1514416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
15152ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
15160452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1517416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1518416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1519416022c9SBarry Smith   } else a->colmap = 0;
1520ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
15210452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1522464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1523416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1524416022c9SBarry Smith   } else a->garray = 0;
1525d6dfbf8fSBarry Smith 
1526416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1527416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1528a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1529416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1530416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1531416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1532416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1533416022c9SBarry Smith   PLogObjectParent(mat,a->B);
15345dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
153525cdf11fSBarry Smith   if (flg) {
1536682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1537682d7d0cSBarry Smith   }
15388a729477SBarry Smith   *newmat = mat;
15398a729477SBarry Smith   return 0;
15408a729477SBarry Smith }
1541416022c9SBarry Smith 
1542416022c9SBarry Smith #include "sysio.h"
1543416022c9SBarry Smith 
1544c456f294SBarry Smith int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat)
1545416022c9SBarry Smith {
1546d65a2f8fSBarry Smith   Mat          A;
1547416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1548d65a2f8fSBarry Smith   Scalar       *vals,*svals;
1549416022c9SBarry Smith   PetscObject  vobj = (PetscObject) bview;
1550416022c9SBarry Smith   MPI_Comm     comm = vobj->comm;
1551416022c9SBarry Smith   MPI_Status   status;
155217699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1553d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1554d65a2f8fSBarry Smith   int          tag = ((PetscObject)bview)->tag;
1555416022c9SBarry Smith 
155617699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
155717699dbbSLois Curfman McInnes   if (!rank) {
1558416022c9SBarry Smith     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1559416022c9SBarry Smith     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1560c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1561416022c9SBarry Smith   }
1562416022c9SBarry Smith 
1563416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1564416022c9SBarry Smith   M = header[1]; N = header[2];
1565416022c9SBarry Smith   /* determine ownership of all rows */
156617699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
15670452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1568416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1569416022c9SBarry Smith   rowners[0] = 0;
157017699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1571416022c9SBarry Smith     rowners[i] += rowners[i-1];
1572416022c9SBarry Smith   }
157317699dbbSLois Curfman McInnes   rstart = rowners[rank];
157417699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1575416022c9SBarry Smith 
1576416022c9SBarry Smith   /* distribute row lengths to all processors */
15770452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1578416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
157917699dbbSLois Curfman McInnes   if (!rank) {
15800452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1581416022c9SBarry Smith     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
15820452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
158317699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1584d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
15850452661fSBarry Smith     PetscFree(sndcounts);
1586416022c9SBarry Smith   }
1587416022c9SBarry Smith   else {
1588416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1589416022c9SBarry Smith   }
1590416022c9SBarry Smith 
159117699dbbSLois Curfman McInnes   if (!rank) {
1592416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
15930452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1594cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
159517699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1596416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1597416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1598416022c9SBarry Smith       }
1599416022c9SBarry Smith     }
16000452661fSBarry Smith     PetscFree(rowlengths);
1601416022c9SBarry Smith 
1602416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1603416022c9SBarry Smith     maxnz = 0;
160417699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
16050452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1606416022c9SBarry Smith     }
16070452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1608416022c9SBarry Smith 
1609416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1610416022c9SBarry Smith     nz = procsnz[0];
16110452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1612d65a2f8fSBarry Smith     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1613d65a2f8fSBarry Smith 
1614d65a2f8fSBarry Smith     /* read in every one elses and ship off */
161517699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1616d65a2f8fSBarry Smith       nz = procsnz[i];
1617416022c9SBarry Smith       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1618d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1619d65a2f8fSBarry Smith     }
16200452661fSBarry Smith     PetscFree(cols);
1621416022c9SBarry Smith   }
1622416022c9SBarry Smith   else {
1623416022c9SBarry Smith     /* determine buffer space needed for message */
1624416022c9SBarry Smith     nz = 0;
1625416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1626416022c9SBarry Smith       nz += ourlens[i];
1627416022c9SBarry Smith     }
16280452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1629416022c9SBarry Smith 
1630416022c9SBarry Smith     /* receive message of column indices*/
1631d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1632416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1633c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1634416022c9SBarry Smith   }
1635416022c9SBarry Smith 
1636416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1637cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1638416022c9SBarry Smith   jj = 0;
1639416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1640416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1641d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1642416022c9SBarry Smith       jj++;
1643416022c9SBarry Smith     }
1644416022c9SBarry Smith   }
1645d65a2f8fSBarry Smith 
1646d65a2f8fSBarry Smith   /* create our matrix */
1647416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1648416022c9SBarry Smith     ourlens[i] -= offlens[i];
1649416022c9SBarry Smith   }
1650d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1651d65a2f8fSBarry Smith   A = *newmat;
1652d65a2f8fSBarry Smith   MatSetOption(A,COLUMNS_SORTED);
1653d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1654d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1655d65a2f8fSBarry Smith   }
1656416022c9SBarry Smith 
165717699dbbSLois Curfman McInnes   if (!rank) {
16580452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1659416022c9SBarry Smith 
1660416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1661416022c9SBarry Smith     nz = procsnz[0];
1662416022c9SBarry Smith     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1663d65a2f8fSBarry Smith 
1664d65a2f8fSBarry Smith     /* insert into matrix */
1665d65a2f8fSBarry Smith     jj      = rstart;
1666d65a2f8fSBarry Smith     smycols = mycols;
1667d65a2f8fSBarry Smith     svals   = vals;
1668d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1669d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1670d65a2f8fSBarry Smith       smycols += ourlens[i];
1671d65a2f8fSBarry Smith       svals   += ourlens[i];
1672d65a2f8fSBarry Smith       jj++;
1673416022c9SBarry Smith     }
1674416022c9SBarry Smith 
1675d65a2f8fSBarry Smith     /* read in other processors and ship out */
167617699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1677416022c9SBarry Smith       nz = procsnz[i];
1678416022c9SBarry Smith       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1679d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1680416022c9SBarry Smith     }
16810452661fSBarry Smith     PetscFree(procsnz);
1682416022c9SBarry Smith   }
1683d65a2f8fSBarry Smith   else {
1684d65a2f8fSBarry Smith     /* receive numeric values */
16850452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1686416022c9SBarry Smith 
1687d65a2f8fSBarry Smith     /* receive message of values*/
1688d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1689d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1690c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1691d65a2f8fSBarry Smith 
1692d65a2f8fSBarry Smith     /* insert into matrix */
1693d65a2f8fSBarry Smith     jj      = rstart;
1694d65a2f8fSBarry Smith     smycols = mycols;
1695d65a2f8fSBarry Smith     svals   = vals;
1696d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1697d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1698d65a2f8fSBarry Smith       smycols += ourlens[i];
1699d65a2f8fSBarry Smith       svals   += ourlens[i];
1700d65a2f8fSBarry Smith       jj++;
1701d65a2f8fSBarry Smith     }
1702d65a2f8fSBarry Smith   }
17030452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1704d65a2f8fSBarry Smith 
1705d65a2f8fSBarry Smith   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1706d65a2f8fSBarry Smith   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1707416022c9SBarry Smith   return 0;
1708416022c9SBarry Smith }
1709