xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3a511b965e4e3ce816f0ab4de2dcfdf24075fa37)
1cb512458SBarry Smith #ifndef lint
2*3a511b96SLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.123 1996/02/07 23:13:36 balay Exp curfman $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int        ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
408a729477SBarry Smith {
4144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
434b0e389bSBarry Smith   Scalar     value;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
464b0e389bSBarry Smith   int        shift = C->indexshift,roworiented = aij->roworiented;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
534b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
544b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
564b0e389bSBarry Smith       row = im[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
584b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
594b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
604b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
614b0e389bSBarry Smith           col = in[j] - cstart;
624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
634b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
641eb62cbbSBarry Smith         }
651eb62cbbSBarry Smith         else {
66227d817aSBarry Smith           if (mat->was_assembled) {
67b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
684b0e389bSBarry Smith             col = aij->colmap[in[j]] + shift;
69ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
702493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
714b0e389bSBarry Smith               col =  in[j];
72d6dfbf8fSBarry Smith             }
739e25ed09SBarry Smith           }
744b0e389bSBarry Smith           else col = in[j];
754b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
764b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
771eb62cbbSBarry Smith         }
781eb62cbbSBarry Smith       }
791eb62cbbSBarry Smith     }
801eb62cbbSBarry Smith     else {
814b0e389bSBarry Smith       if (roworiented) {
824b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
834b0e389bSBarry Smith       }
844b0e389bSBarry Smith       else {
854b0e389bSBarry Smith         row = im[i];
864b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
874b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
884b0e389bSBarry Smith         }
894b0e389bSBarry Smith       }
901eb62cbbSBarry Smith     }
918a729477SBarry Smith   }
928a729477SBarry Smith   return 0;
938a729477SBarry Smith }
948a729477SBarry Smith 
95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96b49de8d1SLois Curfman McInnes {
97b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
101b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
102b49de8d1SLois Curfman McInnes 
103b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
104b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
107b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
108b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
109b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
112b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
113b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114b49de8d1SLois Curfman McInnes         }
115b49de8d1SLois Curfman McInnes         else {
116b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
117b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
118b49de8d1SLois Curfman McInnes         }
119b49de8d1SLois Curfman McInnes       }
120b49de8d1SLois Curfman McInnes     }
121b49de8d1SLois Curfman McInnes     else {
122b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
123b49de8d1SLois Curfman McInnes     }
124b49de8d1SLois Curfman McInnes   }
125b49de8d1SLois Curfman McInnes   return 0;
126b49de8d1SLois Curfman McInnes }
127b49de8d1SLois Curfman McInnes 
128fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1298a729477SBarry Smith {
13044a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
131d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
13217699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
13317699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1341eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1356abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1361eb62cbbSBarry Smith   InsertMode  addv;
1371eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1381eb62cbbSBarry Smith 
1391eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
140d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
141dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
142bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1431eb62cbbSBarry Smith   }
1441eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1451eb62cbbSBarry Smith 
1461eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1470452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
148cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1490452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1501eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1511eb62cbbSBarry Smith     idx = aij->stash.idx[i];
15217699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1531eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1541eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1558a729477SBarry Smith       }
1568a729477SBarry Smith     }
1578a729477SBarry Smith   }
15817699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1591eb62cbbSBarry Smith 
1601eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1610452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
16217699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
16317699dbbSLois Curfman McInnes   nreceives = work[rank];
16417699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
16517699dbbSLois Curfman McInnes   nmax = work[rank];
1660452661fSBarry Smith   PetscFree(work);
1671eb62cbbSBarry Smith 
1681eb62cbbSBarry Smith   /* post receives:
1691eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1701eb62cbbSBarry Smith      (global index,value) we store the global index as a double
171d6dfbf8fSBarry Smith      to simplify the message passing.
1721eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1731eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1741eb62cbbSBarry Smith      this is a lot of wasted space.
1751eb62cbbSBarry Smith 
1761eb62cbbSBarry Smith 
1771eb62cbbSBarry Smith        This could be done better.
1781eb62cbbSBarry Smith   */
1790452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
18078b31e54SBarry Smith   CHKPTRQ(rvalues);
1810452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
18278b31e54SBarry Smith   CHKPTRQ(recv_waits);
1831eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
184416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1851eb62cbbSBarry Smith               comm,recv_waits+i);
1861eb62cbbSBarry Smith   }
1871eb62cbbSBarry Smith 
1881eb62cbbSBarry Smith   /* do sends:
1891eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1901eb62cbbSBarry Smith          the ith processor
1911eb62cbbSBarry Smith   */
1920452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1930452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
19478b31e54SBarry Smith   CHKPTRQ(send_waits);
1950452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1961eb62cbbSBarry Smith   starts[0] = 0;
19717699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1981eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1991eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2001eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2011eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2021eb62cbbSBarry Smith   }
2030452661fSBarry Smith   PetscFree(owner);
2041eb62cbbSBarry Smith   starts[0] = 0;
20517699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2061eb62cbbSBarry Smith   count = 0;
20717699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2081eb62cbbSBarry Smith     if (procs[i]) {
209416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2101eb62cbbSBarry Smith                 comm,send_waits+count++);
2111eb62cbbSBarry Smith     }
2121eb62cbbSBarry Smith   }
2130452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2141eb62cbbSBarry Smith 
2151eb62cbbSBarry Smith   /* Free cache space */
2162191d07cSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
21778b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2181eb62cbbSBarry Smith 
2191eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2201eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2211eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2221eb62cbbSBarry Smith   aij->rmax       = nmax;
2231eb62cbbSBarry Smith 
2241eb62cbbSBarry Smith   return 0;
2251eb62cbbSBarry Smith }
22644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2271eb62cbbSBarry Smith 
228fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2291eb62cbbSBarry Smith {
23044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
231dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2321eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
233416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
234416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2351eb62cbbSBarry Smith   Scalar      *values,val;
2361eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2371eb62cbbSBarry Smith 
2381eb62cbbSBarry Smith   /*  wait on receives */
2391eb62cbbSBarry Smith   while (count) {
240d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2411eb62cbbSBarry Smith     /* unpack receives into our local space */
242d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
243c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2441eb62cbbSBarry Smith     n = n/3;
2451eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
246227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
247227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2481eb62cbbSBarry Smith       val = values[3*i+2];
2491eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2501eb62cbbSBarry Smith         col -= aij->cstart;
2511eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2521eb62cbbSBarry Smith       }
2531eb62cbbSBarry Smith       else {
254227d817aSBarry Smith         if (mat->was_assembled) {
255b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
256dbb450caSBarry Smith           col = aij->colmap[col] + shift;
257ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2582493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
259227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
260d6dfbf8fSBarry Smith           }
2619e25ed09SBarry Smith         }
2621eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2631eb62cbbSBarry Smith       }
2641eb62cbbSBarry Smith     }
2651eb62cbbSBarry Smith     count--;
2661eb62cbbSBarry Smith   }
2670452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2681eb62cbbSBarry Smith 
2691eb62cbbSBarry Smith   /* wait on sends */
2701eb62cbbSBarry Smith   if (aij->nsends) {
2710452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
27278b31e54SBarry Smith     CHKPTRQ(send_status);
2731eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2740452661fSBarry Smith     PetscFree(send_status);
2751eb62cbbSBarry Smith   }
2760452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2771eb62cbbSBarry Smith 
27864119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
27978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
28078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2811eb62cbbSBarry Smith 
2822493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2832493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
284227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
285227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
2862493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2872493cbb0SBarry Smith   }
2882493cbb0SBarry Smith 
289227d817aSBarry Smith   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
29078b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
2915e42470aSBarry Smith   }
29278b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
29378b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2945e42470aSBarry Smith 
2958a729477SBarry Smith   return 0;
2968a729477SBarry Smith }
2978a729477SBarry Smith 
2982ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
2991eb62cbbSBarry Smith {
30044a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
301dbd7a890SLois Curfman McInnes   int        ierr;
30278b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
30378b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3041eb62cbbSBarry Smith   return 0;
3051eb62cbbSBarry Smith }
3061eb62cbbSBarry Smith 
3071eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3081eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3091eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3101eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3111eb62cbbSBarry Smith    routine.
3121eb62cbbSBarry Smith */
31344a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3141eb62cbbSBarry Smith {
31544a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
31617699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3176abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
31817699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3195392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
320d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
321d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3221eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3231eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3241eb62cbbSBarry Smith   IS             istmp;
3251eb62cbbSBarry Smith 
32678b31e54SBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
32778b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3281eb62cbbSBarry Smith 
3291eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3300452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
331cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3320452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3331eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3341eb62cbbSBarry Smith     idx = rows[i];
3351eb62cbbSBarry Smith     found = 0;
33617699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3371eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3381eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3391eb62cbbSBarry Smith       }
3401eb62cbbSBarry Smith     }
341bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3421eb62cbbSBarry Smith   }
34317699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3441eb62cbbSBarry Smith 
3451eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3460452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
34717699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
34817699dbbSLois Curfman McInnes   nrecvs = work[rank];
34917699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
35017699dbbSLois Curfman McInnes   nmax = work[rank];
3510452661fSBarry Smith   PetscFree(work);
3521eb62cbbSBarry Smith 
3531eb62cbbSBarry Smith   /* post receives:   */
3540452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
35578b31e54SBarry Smith   CHKPTRQ(rvalues);
3560452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
35778b31e54SBarry Smith   CHKPTRQ(recv_waits);
3581eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
359416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3601eb62cbbSBarry Smith   }
3611eb62cbbSBarry Smith 
3621eb62cbbSBarry Smith   /* do sends:
3631eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3641eb62cbbSBarry Smith          the ith processor
3651eb62cbbSBarry Smith   */
3660452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3670452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
36878b31e54SBarry Smith   CHKPTRQ(send_waits);
3690452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3701eb62cbbSBarry Smith   starts[0] = 0;
37117699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3721eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3731eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3741eb62cbbSBarry Smith   }
3751eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3761eb62cbbSBarry Smith 
3771eb62cbbSBarry Smith   starts[0] = 0;
37817699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3791eb62cbbSBarry Smith   count = 0;
38017699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3811eb62cbbSBarry Smith     if (procs[i]) {
382416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3831eb62cbbSBarry Smith     }
3841eb62cbbSBarry Smith   }
3850452661fSBarry Smith   PetscFree(starts);
3861eb62cbbSBarry Smith 
38717699dbbSLois Curfman McInnes   base = owners[rank];
3881eb62cbbSBarry Smith 
3891eb62cbbSBarry Smith   /*  wait on receives */
3900452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3911eb62cbbSBarry Smith   source = lens + nrecvs;
3921eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3931eb62cbbSBarry Smith   while (count) {
394d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3951eb62cbbSBarry Smith     /* unpack receives into our local space */
3961eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
397d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
398d6dfbf8fSBarry Smith     lens[imdex]  = n;
3991eb62cbbSBarry Smith     slen += n;
4001eb62cbbSBarry Smith     count--;
4011eb62cbbSBarry Smith   }
4020452661fSBarry Smith   PetscFree(recv_waits);
4031eb62cbbSBarry Smith 
4041eb62cbbSBarry Smith   /* move the data into the send scatter */
4050452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4061eb62cbbSBarry Smith   count = 0;
4071eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4081eb62cbbSBarry Smith     values = rvalues + i*nmax;
4091eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4101eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4111eb62cbbSBarry Smith     }
4121eb62cbbSBarry Smith   }
4130452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4140452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4151eb62cbbSBarry Smith 
4161eb62cbbSBarry Smith   /* actually zap the local rows */
417416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
418464493b3SBarry Smith   PLogObjectParent(A,istmp);
4190452661fSBarry Smith   PetscFree(lrows);
42078b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
42178b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
42278b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4231eb62cbbSBarry Smith 
4241eb62cbbSBarry Smith   /* wait on sends */
4251eb62cbbSBarry Smith   if (nsends) {
4260452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
42778b31e54SBarry Smith     CHKPTRQ(send_status);
4281eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4290452661fSBarry Smith     PetscFree(send_status);
4301eb62cbbSBarry Smith   }
4310452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4321eb62cbbSBarry Smith 
4331eb62cbbSBarry Smith   return 0;
4341eb62cbbSBarry Smith }
4351eb62cbbSBarry Smith 
436416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4371eb62cbbSBarry Smith {
438416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
4391eb62cbbSBarry Smith   int        ierr;
440416022c9SBarry Smith 
44164119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
442416022c9SBarry Smith   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
44364119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
444416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4451eb62cbbSBarry Smith   return 0;
4461eb62cbbSBarry Smith }
4471eb62cbbSBarry Smith 
448416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
449da3a660dSBarry Smith {
450416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
451da3a660dSBarry Smith   int        ierr;
45264119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
453416022c9SBarry Smith   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
45464119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
455416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
456da3a660dSBarry Smith   return 0;
457da3a660dSBarry Smith }
458da3a660dSBarry Smith 
459416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
460da3a660dSBarry Smith {
461416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
462da3a660dSBarry Smith   int        ierr;
463da3a660dSBarry Smith 
464da3a660dSBarry Smith   /* do nondiagonal part */
465416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
466da3a660dSBarry Smith   /* send it on its way */
467416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
46864119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
469da3a660dSBarry Smith   /* do local part */
470416022c9SBarry Smith   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
471da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
472da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
473da3a660dSBarry Smith   /* but is not perhaps always true. */
474416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
47564119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
476da3a660dSBarry Smith   return 0;
477da3a660dSBarry Smith }
478da3a660dSBarry Smith 
479416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
480da3a660dSBarry Smith {
481416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
482da3a660dSBarry Smith   int        ierr;
483da3a660dSBarry Smith 
484da3a660dSBarry Smith   /* do nondiagonal part */
485416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
486da3a660dSBarry Smith   /* send it on its way */
487416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
48864119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
489da3a660dSBarry Smith   /* do local part */
490416022c9SBarry Smith   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
491da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
492da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
493da3a660dSBarry Smith   /* but is not perhaps always true. */
494416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
49564119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
496da3a660dSBarry Smith   return 0;
497da3a660dSBarry Smith }
498da3a660dSBarry Smith 
4991eb62cbbSBarry Smith /*
5001eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5011eb62cbbSBarry Smith    diagonal block
5021eb62cbbSBarry Smith */
503416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5041eb62cbbSBarry Smith {
505416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
506416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5071eb62cbbSBarry Smith }
5081eb62cbbSBarry Smith 
509052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
510052efed2SBarry Smith {
511052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
512052efed2SBarry Smith   int        ierr;
513052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
514052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
515052efed2SBarry Smith   return 0;
516052efed2SBarry Smith }
517052efed2SBarry Smith 
51844a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5191eb62cbbSBarry Smith {
5201eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
52144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5221eb62cbbSBarry Smith   int        ierr;
523a5a9c739SBarry Smith #if defined(PETSC_LOG)
5246e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
525a5a9c739SBarry Smith #endif
5260452661fSBarry Smith   PetscFree(aij->rowners);
52778b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
52878b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5290452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5300452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5311eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
532a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5330452661fSBarry Smith   PetscFree(aij);
534a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5350452661fSBarry Smith   PetscHeaderDestroy(mat);
5361eb62cbbSBarry Smith   return 0;
5371eb62cbbSBarry Smith }
5387857610eSBarry Smith #include "draw.h"
539b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
540ee50ffe9SBarry Smith 
541416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5421eb62cbbSBarry Smith {
543416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
544416022c9SBarry Smith   int         ierr;
545416022c9SBarry Smith 
54617699dbbSLois Curfman McInnes   if (aij->size == 1) {
547416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
548416022c9SBarry Smith   }
549a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
550416022c9SBarry Smith   return 0;
551416022c9SBarry Smith }
552416022c9SBarry Smith 
553d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
554416022c9SBarry Smith {
55544a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
556dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
557a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
558d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
559d636dbe3SBarry Smith   FILE        *fd;
560416022c9SBarry Smith 
561416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
562416022c9SBarry Smith     ierr = ViewerFileGetFormat_Private(viewer,&format);
56308480c60SBarry Smith     if (format == FILE_FORMAT_INFO_DETAILED) {
564a56f8943SBarry Smith       int nz,nzalloc,mem;
565a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
566227d817aSBarry Smith       ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
567a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
568a56f8943SBarry Smith       MPIU_Seq_begin(mat->comm,1);
569a56f8943SBarry Smith       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
570a56f8943SBarry Smith                 nzalloc,mem);
57108480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
57208480c60SBarry Smith       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
57308480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
57408480c60SBarry Smith       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
575a56f8943SBarry Smith       fflush(fd);
576a56f8943SBarry Smith       MPIU_Seq_end(mat->comm,1);
577a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
578416022c9SBarry Smith       return 0;
579416022c9SBarry Smith     }
58008480c60SBarry Smith     else if (format == FILE_FORMAT_INFO) {
58108480c60SBarry Smith       return 0;
58208480c60SBarry Smith     }
583416022c9SBarry Smith   }
584416022c9SBarry Smith 
585416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER) {
586227d817aSBarry Smith     ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
5877f813858SBarry Smith     MPIU_Seq_begin(mat->comm,1);
588d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
58917699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5901eb62cbbSBarry Smith            aij->cend);
59178b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
59278b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
593d13ab20cSBarry Smith     fflush(fd);
5947f813858SBarry Smith     MPIU_Seq_end(mat->comm,1);
595d13ab20cSBarry Smith   }
596416022c9SBarry Smith   else {
597a56f8943SBarry Smith     int size = aij->size;
598a56f8943SBarry Smith     rank = aij->rank;
59917699dbbSLois Curfman McInnes     if (size == 1) {
60078b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
60195373324SBarry Smith     }
60295373324SBarry Smith     else {
60395373324SBarry Smith       /* assemble the entire matrix onto first processor. */
60495373324SBarry Smith       Mat         A;
605ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6062eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
60795373324SBarry Smith       Scalar      *a;
6082ee70a88SLois Curfman McInnes 
60917699dbbSLois Curfman McInnes       if (!rank) {
610b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
611c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
61295373324SBarry Smith       }
61395373324SBarry Smith       else {
614b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
615c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
61695373324SBarry Smith       }
617464493b3SBarry Smith       PLogObjectParent(mat,A);
618416022c9SBarry Smith 
61995373324SBarry Smith       /* copy over the A part */
620ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6212ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
62295373324SBarry Smith       row = aij->rstart;
623dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
62495373324SBarry Smith       for ( i=0; i<m; i++ ) {
625416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
62695373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
62795373324SBarry Smith       }
6282ee70a88SLois Curfman McInnes       aj = Aloc->j;
629dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
63095373324SBarry Smith 
63195373324SBarry Smith       /* copy over the B part */
632ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6332ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
63495373324SBarry Smith       row = aij->rstart;
6350452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
636dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
63795373324SBarry Smith       for ( i=0; i<m; i++ ) {
638416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
63995373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
64095373324SBarry Smith       }
6410452661fSBarry Smith       PetscFree(ct);
64278b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
64378b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
64417699dbbSLois Curfman McInnes       if (!rank) {
64578b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
64695373324SBarry Smith       }
64778b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
64895373324SBarry Smith     }
64995373324SBarry Smith   }
6501eb62cbbSBarry Smith   return 0;
6511eb62cbbSBarry Smith }
6521eb62cbbSBarry Smith 
653416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
654416022c9SBarry Smith {
655416022c9SBarry Smith   Mat         mat = (Mat) obj;
656416022c9SBarry Smith   int         ierr;
657416022c9SBarry Smith   PetscObject vobj = (PetscObject) viewer;
658416022c9SBarry Smith 
659416022c9SBarry Smith   if (!viewer) {
660416022c9SBarry Smith     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
661416022c9SBarry Smith   }
662416022c9SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
663416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
664d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
665416022c9SBarry Smith   }
666416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
667d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
668d7e8b826SBarry Smith   }
669d7e8b826SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
670d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
671416022c9SBarry Smith   }
672416022c9SBarry Smith   else if (vobj->cookie == DRAW_COOKIE) {
673d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
674416022c9SBarry Smith   }
675416022c9SBarry Smith   else if (vobj->type == BINARY_FILE_VIEWER) {
676416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
677416022c9SBarry Smith   }
678416022c9SBarry Smith   return 0;
679416022c9SBarry Smith }
680416022c9SBarry Smith 
681ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat);
6821eb62cbbSBarry Smith /*
6831eb62cbbSBarry Smith     This has to provide several versions.
6841eb62cbbSBarry Smith 
6851eb62cbbSBarry Smith      1) per sequential
6861eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6871eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
688d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
6891eb62cbbSBarry Smith */
690fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
691dbb450caSBarry Smith                            double fshift,int its,Vec xx)
6928a729477SBarry Smith {
69344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
694d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
695ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
6966abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
6976abc6512SBarry Smith   int        ierr,*idx, *diag;
698416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
699da3a660dSBarry Smith   Vec        tt;
7008a729477SBarry Smith 
701d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
702dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
703dbb450caSBarry Smith   ls = ls + shift;
704ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
705d6dfbf8fSBarry Smith   diag = A->diag;
706acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
70748d91487SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
708acb40c82SBarry Smith   }
709da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
710da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
711da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
712da3a660dSBarry Smith 
713da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
714da3a660dSBarry Smith 
715da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
716da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
717da3a660dSBarry Smith     is the relaxation factor.
718da3a660dSBarry Smith     */
71978b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
720464493b3SBarry Smith     PLogObjectParent(matin,tt);
721da3a660dSBarry Smith     VecGetArray(tt,&t);
722da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
723da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
724da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
72564119d58SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
72678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
727da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
728da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
729dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
730dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
731da3a660dSBarry Smith       sum  = b[i];
732da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
733dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
734da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
735dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
736dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
737da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
738da3a660dSBarry Smith       x[i] = omega*(sum/d);
739da3a660dSBarry Smith     }
74064119d58SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
74178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
742da3a660dSBarry Smith 
743da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
744da3a660dSBarry Smith     v = A->a;
745dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
746da3a660dSBarry Smith 
747da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
748dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
749da3a660dSBarry Smith     diag = A->diag;
750da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
75164119d58SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
75278b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
753da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
754da3a660dSBarry Smith       n    = diag[i] - A->i[i];
755dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
756dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
757da3a660dSBarry Smith       sum  = t[i];
758da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
759dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
760da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
761dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
762dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
763da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
764da3a660dSBarry Smith       t[i] = omega*(sum/d);
765da3a660dSBarry Smith     }
76664119d58SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
76778b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
768da3a660dSBarry Smith     /*  x = x + t */
769da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
770da3a660dSBarry Smith     VecDestroy(tt);
771da3a660dSBarry Smith     return 0;
772da3a660dSBarry Smith   }
773da3a660dSBarry Smith 
7741eb62cbbSBarry Smith 
775d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
776da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
777da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
778da3a660dSBarry Smith     }
779da3a660dSBarry Smith     else {
78064119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
78178b31e54SBarry Smith       CHKERRQ(ierr);
78264119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
78378b31e54SBarry Smith       CHKERRQ(ierr);
784da3a660dSBarry Smith     }
785d6dfbf8fSBarry Smith     while (its--) {
786d6dfbf8fSBarry Smith       /* go down through the rows */
78764119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
78878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
789d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
790d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
791dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
792dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
793d6dfbf8fSBarry Smith         sum  = b[i];
794d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
795dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
796d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
797dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
798dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
799d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
800d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
801d6dfbf8fSBarry Smith       }
80264119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
80378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
804d6dfbf8fSBarry Smith       /* come up through the rows */
80564119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
80678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
807d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
808d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
809dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
810dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
811d6dfbf8fSBarry Smith         sum  = b[i];
812d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
813dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
814d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
815dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
816dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
817d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
818d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
819d6dfbf8fSBarry Smith       }
82064119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
82178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
822d6dfbf8fSBarry Smith     }
823d6dfbf8fSBarry Smith   }
824d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
825da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
826da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
82764119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
82878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
829da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
830da3a660dSBarry Smith         n    = diag[i] - A->i[i];
831dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
832dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
833da3a660dSBarry Smith         sum  = b[i];
834da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
835dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
836da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
837dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
838dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
839da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
840da3a660dSBarry Smith         x[i] = omega*(sum/d);
841da3a660dSBarry Smith       }
84264119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
84378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
844da3a660dSBarry Smith       its--;
845da3a660dSBarry Smith     }
846d6dfbf8fSBarry Smith     while (its--) {
84764119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
84878b31e54SBarry Smith       CHKERRQ(ierr);
84964119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
85078b31e54SBarry Smith       CHKERRQ(ierr);
85164119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
85278b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
853d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
854d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
855dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
856dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
857d6dfbf8fSBarry Smith         sum  = b[i];
858d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
859dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
860d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
861dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
862dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
863d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
864d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
865d6dfbf8fSBarry Smith       }
86664119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
86778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
868d6dfbf8fSBarry Smith     }
869d6dfbf8fSBarry Smith   }
870d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
871da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
872da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
87364119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
87478b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
875da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
876da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
877dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
878dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
879da3a660dSBarry Smith         sum  = b[i];
880da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
881dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
882da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
883dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
884dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
885da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
886da3a660dSBarry Smith         x[i] = omega*(sum/d);
887da3a660dSBarry Smith       }
88864119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
88978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
890da3a660dSBarry Smith       its--;
891da3a660dSBarry Smith     }
892d6dfbf8fSBarry Smith     while (its--) {
89364119d58SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
89478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
89564119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
89678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
89764119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
89878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
899d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
900d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
901dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
902dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
903d6dfbf8fSBarry Smith         sum  = b[i];
904d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
905dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
906d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
907dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
908dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
909d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
910d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
911d6dfbf8fSBarry Smith       }
91264119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
91378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
914d6dfbf8fSBarry Smith     }
915d6dfbf8fSBarry Smith   }
916d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
917da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
918dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
919da3a660dSBarry Smith     }
92064119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
92178b31e54SBarry Smith     CHKERRQ(ierr);
92264119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
92378b31e54SBarry Smith     CHKERRQ(ierr);
924d6dfbf8fSBarry Smith     while (its--) {
925d6dfbf8fSBarry Smith       /* go down through the rows */
926d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
927d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
928dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
929dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
930d6dfbf8fSBarry Smith         sum  = b[i];
931d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
932dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
933d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
934dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
935dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
936d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
937d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
938d6dfbf8fSBarry Smith       }
939d6dfbf8fSBarry Smith       /* come up through the rows */
940d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
941d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
942dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
943dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
944d6dfbf8fSBarry Smith         sum  = b[i];
945d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
946dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
947d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
948dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
949dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
950d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
951d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
952d6dfbf8fSBarry Smith       }
953d6dfbf8fSBarry Smith     }
954d6dfbf8fSBarry Smith   }
955d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
956da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
957dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
958da3a660dSBarry Smith     }
95964119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
96078b31e54SBarry Smith     CHKERRQ(ierr);
96164119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
96278b31e54SBarry Smith     CHKERRQ(ierr);
963d6dfbf8fSBarry Smith     while (its--) {
964d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
965d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
966dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
967dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
968d6dfbf8fSBarry Smith         sum  = b[i];
969d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
970dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
971d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
972dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
973dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
974d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
975d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
976d6dfbf8fSBarry Smith       }
977d6dfbf8fSBarry Smith     }
978d6dfbf8fSBarry Smith   }
979d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
980da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
981dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
982da3a660dSBarry Smith     }
98364119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
98478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
98564119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
98678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
987d6dfbf8fSBarry Smith     while (its--) {
988d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
989d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
990dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
991dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
992d6dfbf8fSBarry Smith         sum  = b[i];
993d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
994dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
995d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
996dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
997dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
998d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
999d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1000d6dfbf8fSBarry Smith       }
1001d6dfbf8fSBarry Smith     }
1002d6dfbf8fSBarry Smith   }
10038a729477SBarry Smith   return 0;
10048a729477SBarry Smith }
1005a66be287SLois Curfman McInnes 
1006fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1007fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
1008a66be287SLois Curfman McInnes {
1009a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1010a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
1011a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1012a66be287SLois Curfman McInnes 
101378b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
101478b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1015a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1016a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
1017a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1018a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1019d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1020a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1021a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1022d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1023a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1024a66be287SLois Curfman McInnes   }
1025a66be287SLois Curfman McInnes   return 0;
1026a66be287SLois Curfman McInnes }
1027a66be287SLois Curfman McInnes 
1028299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1029299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1030299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1031299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1032299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1033299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1034299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1035299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1036299609e3SLois Curfman McInnes 
1037416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1038c74985f6SBarry Smith {
1039c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1040c74985f6SBarry Smith 
1041c0bbcb79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
1042c0bbcb79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
1043c0bbcb79SLois Curfman McInnes       op == COLUMNS_SORTED ||
1044c0bbcb79SLois Curfman McInnes       op == ROW_ORIENTED) {
1045c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
1046c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
1047c74985f6SBarry Smith   }
1048c0bbcb79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
1049c0bbcb79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
1050c0bbcb79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1051c0bbcb79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
1052c0bbcb79SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10534b0e389bSBarry Smith   else if (op == COLUMN_ORIENTED) {
10544b0e389bSBarry Smith     a->roworiented = 0;
10554b0e389bSBarry Smith     MatSetOption(a->A,op);
10564b0e389bSBarry Smith     MatSetOption(a->B,op);
10574b0e389bSBarry Smith   }
1058c0bbcb79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
10594d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1060c0bbcb79SLois Curfman McInnes   else
10614d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1062c74985f6SBarry Smith   return 0;
1063c74985f6SBarry Smith }
1064c74985f6SBarry Smith 
1065d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1066c74985f6SBarry Smith {
106744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1068c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1069c74985f6SBarry Smith   return 0;
1070c74985f6SBarry Smith }
1071c74985f6SBarry Smith 
1072d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1073c74985f6SBarry Smith {
107444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1075b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1076c74985f6SBarry Smith   return 0;
1077c74985f6SBarry Smith }
1078c74985f6SBarry Smith 
1079d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1080c74985f6SBarry Smith {
108144a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1082c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1083c74985f6SBarry Smith   return 0;
1084c74985f6SBarry Smith }
1085c74985f6SBarry Smith 
108639e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
108739e00950SLois Curfman McInnes {
1088154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1089154123eaSLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1090154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1091154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
109239e00950SLois Curfman McInnes 
1093b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1094abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
109539e00950SLois Curfman McInnes 
1096154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1097154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1098154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
109978b31e54SBarry Smith   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
110078b31e54SBarry Smith   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1101154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1102154123eaSLois Curfman McInnes 
1103154123eaSLois Curfman McInnes   if (v  || idx) {
1104154123eaSLois Curfman McInnes     if (nztot) {
1105154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1106299609e3SLois Curfman McInnes       int imark;
1107154123eaSLois Curfman McInnes       for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1108154123eaSLois Curfman McInnes       if (v) {
11090452661fSBarry Smith         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
111039e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1111154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1112154123eaSLois Curfman McInnes           else break;
1113154123eaSLois Curfman McInnes         }
1114154123eaSLois Curfman McInnes         imark = i;
1115154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1116299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1117154123eaSLois Curfman McInnes       }
1118154123eaSLois Curfman McInnes       if (idx) {
11190452661fSBarry Smith         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1120154123eaSLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1121154123eaSLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1122154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1123154123eaSLois Curfman McInnes           else break;
1124154123eaSLois Curfman McInnes         }
1125154123eaSLois Curfman McInnes         imark = i;
1126154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1127299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
112839e00950SLois Curfman McInnes       }
112939e00950SLois Curfman McInnes     }
113039e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1131154123eaSLois Curfman McInnes   }
113239e00950SLois Curfman McInnes   *nz = nztot;
113378b31e54SBarry Smith   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
113478b31e54SBarry Smith   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
113539e00950SLois Curfman McInnes   return 0;
113639e00950SLois Curfman McInnes }
113739e00950SLois Curfman McInnes 
113839e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
113939e00950SLois Curfman McInnes {
11400452661fSBarry Smith   if (idx) PetscFree(*idx);
11410452661fSBarry Smith   if (v) PetscFree(*v);
114239e00950SLois Curfman McInnes   return 0;
114339e00950SLois Curfman McInnes }
114439e00950SLois Curfman McInnes 
1145cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1146855ac2c5SLois Curfman McInnes {
1147855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1148ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1149416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1150416022c9SBarry Smith   double     sum = 0.0;
115104ca555eSLois Curfman McInnes   Scalar     *v;
115204ca555eSLois Curfman McInnes 
115317699dbbSLois Curfman McInnes   if (aij->size == 1) {
115414183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
115537fa93a5SLois Curfman McInnes   } else {
115604ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
115704ca555eSLois Curfman McInnes       v = amat->a;
115804ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
115904ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
116004ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
116104ca555eSLois Curfman McInnes #else
116204ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
116304ca555eSLois Curfman McInnes #endif
116404ca555eSLois Curfman McInnes       }
116504ca555eSLois Curfman McInnes       v = bmat->a;
116604ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
116704ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
116804ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
116904ca555eSLois Curfman McInnes #else
117004ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
117104ca555eSLois Curfman McInnes #endif
117204ca555eSLois Curfman McInnes       }
117304ca555eSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
117404ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
117504ca555eSLois Curfman McInnes     }
117604ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
117704ca555eSLois Curfman McInnes       double *tmp, *tmp2;
117804ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
11790452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
11800452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1181cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
118204ca555eSLois Curfman McInnes       *norm = 0.0;
118304ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
118404ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1185579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
118604ca555eSLois Curfman McInnes       }
118704ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
118804ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1189579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
119004ca555eSLois Curfman McInnes       }
119104ca555eSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
119204ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
119304ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
119404ca555eSLois Curfman McInnes       }
11950452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
119604ca555eSLois Curfman McInnes     }
119704ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1198515d9167SLois Curfman McInnes       double ntemp = 0.0;
119904ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1200dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
120104ca555eSLois Curfman McInnes         sum = 0.0;
120204ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1203cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
120404ca555eSLois Curfman McInnes         }
1205dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
120604ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1207cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
120804ca555eSLois Curfman McInnes         }
1209515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
121004ca555eSLois Curfman McInnes       }
1211515d9167SLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
121204ca555eSLois Curfman McInnes     }
121304ca555eSLois Curfman McInnes     else {
1214515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
121504ca555eSLois Curfman McInnes     }
121637fa93a5SLois Curfman McInnes   }
1217855ac2c5SLois Curfman McInnes   return 0;
1218855ac2c5SLois Curfman McInnes }
1219855ac2c5SLois Curfman McInnes 
12200de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1221b7c46309SBarry Smith {
1222b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1223dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1224416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1225416022c9SBarry Smith   Mat        B;
1226b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1227b7c46309SBarry Smith   Scalar     *array;
1228b7c46309SBarry Smith 
12293638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
12303638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1231b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1232b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1233b7c46309SBarry Smith 
1234b7c46309SBarry Smith   /* copy over the A part */
1235ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1236b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1237b7c46309SBarry Smith   row = a->rstart;
1238dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1239b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1240416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1241b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1242b7c46309SBarry Smith   }
1243b7c46309SBarry Smith   aj = Aloc->j;
12444af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1245b7c46309SBarry Smith 
1246b7c46309SBarry Smith   /* copy over the B part */
1247ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1248b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1249b7c46309SBarry Smith   row = a->rstart;
12500452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1251dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1252b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1253416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1254b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1255b7c46309SBarry Smith   }
12560452661fSBarry Smith   PetscFree(ct);
1257b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1258b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
12593638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
12600de55854SLois Curfman McInnes     *matout = B;
12610de55854SLois Curfman McInnes   } else {
12620de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12630452661fSBarry Smith     PetscFree(a->rowners);
12640de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
12650de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
12660452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
12670452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
12680de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1269a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
12700452661fSBarry Smith     PetscFree(a);
1271416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
12720452661fSBarry Smith     PetscHeaderDestroy(B);
12730de55854SLois Curfman McInnes   }
1274b7c46309SBarry Smith   return 0;
1275b7c46309SBarry Smith }
1276b7c46309SBarry Smith 
1277682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1278682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1279682d7d0cSBarry Smith {
1280682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1281682d7d0cSBarry Smith 
1282682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1283682d7d0cSBarry Smith   else return 0;
1284682d7d0cSBarry Smith }
1285682d7d0cSBarry Smith 
1286fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
12873d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
12882f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1289598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
12908a729477SBarry Smith /* -------------------------------------------------------------------*/
12912ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
129239e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
129344a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
129444a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1295299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1296299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1297299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
129844a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1299b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1300a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1301855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1302ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13031eb62cbbSBarry Smith        0,
1304299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1305299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1306299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1307d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1308299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
13093d1612f7SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1310b49de8d1SLois Curfman McInnes        0,0,0,
1311598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1312052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
1313052efed2SBarry Smith        MatScale_MPIAIJ};
13148a729477SBarry Smith 
13151987afe7SBarry Smith /*@C
1316ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1317*3a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
1318*3a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
1319*3a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1320*3a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
13218a729477SBarry Smith 
13228a729477SBarry Smith    Input Parameters:
13231eb62cbbSBarry Smith .  comm - MPI communicator
13247d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
13257d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
13267d3e4905SLois Curfman McInnes            if N is given)
13277d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
13287d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
13297d3e4905SLois Curfman McInnes            if n is given)
1330ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1331ff756334SLois Curfman McInnes            (same for all local rows)
1332ab693e5aSLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1333ab693e5aSLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1334ab693e5aSLois Curfman McInnes            for the diagonal entry even if it is zero.
1335ab693e5aSLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1336ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
1337ab693e5aSLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1338ab693e5aSLois Curfman McInnes            submatrix or null (possibly different for each row).
13398a729477SBarry Smith 
1340ff756334SLois Curfman McInnes    Output Parameter:
13418a729477SBarry Smith .  newmat - the matrix
13428a729477SBarry Smith 
1343ff756334SLois Curfman McInnes    Notes:
1344ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1345ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
13460002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
13470002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1348ff756334SLois Curfman McInnes 
1349ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1350ff756334SLois Curfman McInnes    (possibly both).
1351ff756334SLois Curfman McInnes 
13525511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
13535511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
13545511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
13555511cfe3SLois Curfman McInnes 
13565511cfe3SLois Curfman McInnes    Options Database Keys:
13575511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
13585511cfe3SLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit (max limit=5)
13595511cfe3SLois Curfman McInnes 
1360e0245417SLois Curfman McInnes    Storage Information:
1361e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1362e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1363e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1364e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1365e0245417SLois Curfman McInnes 
1366e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
13675ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
13685ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
13695ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
13705ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1371ff756334SLois Curfman McInnes 
13725511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
13735511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
13742191d07cSBarry Smith 
1375b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1376b810aeb4SBarry Smith $         -------------------
13775511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
13785511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
13795511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
13805511cfe3SLois Curfman McInnes $         -------------------
1381b810aeb4SBarry Smith $
1382b810aeb4SBarry Smith 
13835511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
13845511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
13855511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
13865511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
13875511cfe3SLois Curfman McInnes 
13885511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
13895511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
13905511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
1391*3a511b96SLois Curfman McInnes    one expects d_nz >> o_nz.   For additional details, see the users manual
1392*3a511b96SLois Curfman McInnes    chapter on matrices and the file $(PETSC_DIR)/Performance.
1393*3a511b96SLois Curfman McInnes 
13942191d07cSBarry Smith 
1395dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1396ff756334SLois Curfman McInnes 
1397fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
13988a729477SBarry Smith @*/
13991eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
14001eb62cbbSBarry Smith                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
14018a729477SBarry Smith {
14028a729477SBarry Smith   Mat          mat;
1403416022c9SBarry Smith   Mat_MPIAIJ   *a;
14046abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1405416022c9SBarry Smith 
14068a729477SBarry Smith   *newmat         = 0;
14070452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1408a5a9c739SBarry Smith   PLogObjectCreate(mat);
14090452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1410416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
141144a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
141244a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
14138a729477SBarry Smith   mat->factor     = 0;
1414c456f294SBarry Smith   mat->assembled  = PETSC_FALSE;
1415d6dfbf8fSBarry Smith 
141664119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
141717699dbbSLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
141817699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
14191eb62cbbSBarry Smith 
1420b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
14211987afe7SBarry Smith     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
14221987afe7SBarry Smith 
1423dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
14241eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1425d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1426dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1427dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
14281eb62cbbSBarry Smith   }
142917699dbbSLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
143017699dbbSLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1431416022c9SBarry Smith   a->m = m;
1432416022c9SBarry Smith   a->n = n;
1433416022c9SBarry Smith   a->N = N;
1434416022c9SBarry Smith   a->M = M;
14351eb62cbbSBarry Smith 
14361eb62cbbSBarry Smith   /* build local table of row and column ownerships */
14370452661fSBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1438579c6b6fSBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
143917699dbbSLois Curfman McInnes   a->cowners = a->rowners + a->size + 1;
1440416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1441416022c9SBarry Smith   a->rowners[0] = 0;
144217699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1443416022c9SBarry Smith     a->rowners[i] += a->rowners[i-1];
14448a729477SBarry Smith   }
144517699dbbSLois Curfman McInnes   a->rstart = a->rowners[a->rank];
144617699dbbSLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
1447416022c9SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1448416022c9SBarry Smith   a->cowners[0] = 0;
144917699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1450416022c9SBarry Smith     a->cowners[i] += a->cowners[i-1];
14518a729477SBarry Smith   }
145217699dbbSLois Curfman McInnes   a->cstart = a->cowners[a->rank];
145317699dbbSLois Curfman McInnes   a->cend   = a->cowners[a->rank+1];
14548a729477SBarry Smith 
14555ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1456416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1457416022c9SBarry Smith   PLogObjectParent(mat,a->A);
14587b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1459416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1460416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14618a729477SBarry Smith 
14621eb62cbbSBarry Smith   /* build cache for off array entries formed */
1463416022c9SBarry Smith   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1464416022c9SBarry Smith   a->colmap      = 0;
1465416022c9SBarry Smith   a->garray      = 0;
14664b0e389bSBarry Smith   a->roworiented = 1;
14678a729477SBarry Smith 
14681eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
1469416022c9SBarry Smith   a->lvec      = 0;
1470416022c9SBarry Smith   a->Mvctx     = 0;
14718a729477SBarry Smith 
1472d6dfbf8fSBarry Smith   *newmat = mat;
1473d6dfbf8fSBarry Smith   return 0;
1474d6dfbf8fSBarry Smith }
1475c74985f6SBarry Smith 
14763d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1477d6dfbf8fSBarry Smith {
1478d6dfbf8fSBarry Smith   Mat        mat;
1479416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
148025cdf11fSBarry Smith   int        ierr, len,flg;
1481d6dfbf8fSBarry Smith 
1482416022c9SBarry Smith   *newmat       = 0;
14830452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1484a5a9c739SBarry Smith   PLogObjectCreate(mat);
14850452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1486416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
148744a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
148844a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1489d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1490c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1491d6dfbf8fSBarry Smith 
1492416022c9SBarry Smith   a->m          = oldmat->m;
1493416022c9SBarry Smith   a->n          = oldmat->n;
1494416022c9SBarry Smith   a->M          = oldmat->M;
1495416022c9SBarry Smith   a->N          = oldmat->N;
1496d6dfbf8fSBarry Smith 
1497416022c9SBarry Smith   a->rstart     = oldmat->rstart;
1498416022c9SBarry Smith   a->rend       = oldmat->rend;
1499416022c9SBarry Smith   a->cstart     = oldmat->cstart;
1500416022c9SBarry Smith   a->cend       = oldmat->cend;
150117699dbbSLois Curfman McInnes   a->size       = oldmat->size;
150217699dbbSLois Curfman McInnes   a->rank       = oldmat->rank;
150364119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
1504d6dfbf8fSBarry Smith 
15050452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
150617699dbbSLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
150717699dbbSLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1508416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
15092ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
15100452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1511416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1512416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1513416022c9SBarry Smith   } else a->colmap = 0;
1514ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
15150452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1516464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1517416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1518416022c9SBarry Smith   } else a->garray = 0;
1519d6dfbf8fSBarry Smith 
1520416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1521416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1522a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1523416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1524416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1525416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1526416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1527416022c9SBarry Smith   PLogObjectParent(mat,a->B);
15285dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
152925cdf11fSBarry Smith   if (flg) {
1530682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1531682d7d0cSBarry Smith   }
15328a729477SBarry Smith   *newmat = mat;
15338a729477SBarry Smith   return 0;
15348a729477SBarry Smith }
1535416022c9SBarry Smith 
1536416022c9SBarry Smith #include "sysio.h"
1537416022c9SBarry Smith 
1538c456f294SBarry Smith int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat)
1539416022c9SBarry Smith {
1540d65a2f8fSBarry Smith   Mat          A;
1541416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1542d65a2f8fSBarry Smith   Scalar       *vals,*svals;
1543416022c9SBarry Smith   PetscObject  vobj = (PetscObject) bview;
1544416022c9SBarry Smith   MPI_Comm     comm = vobj->comm;
1545416022c9SBarry Smith   MPI_Status   status;
154617699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1547d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1548d65a2f8fSBarry Smith   int          tag = ((PetscObject)bview)->tag;
1549416022c9SBarry Smith 
155017699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
155117699dbbSLois Curfman McInnes   if (!rank) {
1552416022c9SBarry Smith     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1553416022c9SBarry Smith     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1554c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1555416022c9SBarry Smith   }
1556416022c9SBarry Smith 
1557416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1558416022c9SBarry Smith   M = header[1]; N = header[2];
1559416022c9SBarry Smith   /* determine ownership of all rows */
156017699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
15610452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1562416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1563416022c9SBarry Smith   rowners[0] = 0;
156417699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1565416022c9SBarry Smith     rowners[i] += rowners[i-1];
1566416022c9SBarry Smith   }
156717699dbbSLois Curfman McInnes   rstart = rowners[rank];
156817699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1569416022c9SBarry Smith 
1570416022c9SBarry Smith   /* distribute row lengths to all processors */
15710452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1572416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
157317699dbbSLois Curfman McInnes   if (!rank) {
15740452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1575416022c9SBarry Smith     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
15760452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
157717699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1578d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
15790452661fSBarry Smith     PetscFree(sndcounts);
1580416022c9SBarry Smith   }
1581416022c9SBarry Smith   else {
1582416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1583416022c9SBarry Smith   }
1584416022c9SBarry Smith 
158517699dbbSLois Curfman McInnes   if (!rank) {
1586416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
15870452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1588cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
158917699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1590416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1591416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1592416022c9SBarry Smith       }
1593416022c9SBarry Smith     }
15940452661fSBarry Smith     PetscFree(rowlengths);
1595416022c9SBarry Smith 
1596416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1597416022c9SBarry Smith     maxnz = 0;
159817699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
15990452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1600416022c9SBarry Smith     }
16010452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1602416022c9SBarry Smith 
1603416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1604416022c9SBarry Smith     nz = procsnz[0];
16050452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1606d65a2f8fSBarry Smith     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1607d65a2f8fSBarry Smith 
1608d65a2f8fSBarry Smith     /* read in every one elses and ship off */
160917699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1610d65a2f8fSBarry Smith       nz = procsnz[i];
1611416022c9SBarry Smith       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1612d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1613d65a2f8fSBarry Smith     }
16140452661fSBarry Smith     PetscFree(cols);
1615416022c9SBarry Smith   }
1616416022c9SBarry Smith   else {
1617416022c9SBarry Smith     /* determine buffer space needed for message */
1618416022c9SBarry Smith     nz = 0;
1619416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1620416022c9SBarry Smith       nz += ourlens[i];
1621416022c9SBarry Smith     }
16220452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1623416022c9SBarry Smith 
1624416022c9SBarry Smith     /* receive message of column indices*/
1625d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1626416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1627c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1628416022c9SBarry Smith   }
1629416022c9SBarry Smith 
1630416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1631cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1632416022c9SBarry Smith   jj = 0;
1633416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1634416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1635d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1636416022c9SBarry Smith       jj++;
1637416022c9SBarry Smith     }
1638416022c9SBarry Smith   }
1639d65a2f8fSBarry Smith 
1640d65a2f8fSBarry Smith   /* create our matrix */
1641416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1642416022c9SBarry Smith     ourlens[i] -= offlens[i];
1643416022c9SBarry Smith   }
1644d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1645d65a2f8fSBarry Smith   A = *newmat;
1646d65a2f8fSBarry Smith   MatSetOption(A,COLUMNS_SORTED);
1647d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1648d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1649d65a2f8fSBarry Smith   }
1650416022c9SBarry Smith 
165117699dbbSLois Curfman McInnes   if (!rank) {
16520452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1653416022c9SBarry Smith 
1654416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1655416022c9SBarry Smith     nz = procsnz[0];
1656416022c9SBarry Smith     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1657d65a2f8fSBarry Smith 
1658d65a2f8fSBarry Smith     /* insert into matrix */
1659d65a2f8fSBarry Smith     jj      = rstart;
1660d65a2f8fSBarry Smith     smycols = mycols;
1661d65a2f8fSBarry Smith     svals   = vals;
1662d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1663d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1664d65a2f8fSBarry Smith       smycols += ourlens[i];
1665d65a2f8fSBarry Smith       svals   += ourlens[i];
1666d65a2f8fSBarry Smith       jj++;
1667416022c9SBarry Smith     }
1668416022c9SBarry Smith 
1669d65a2f8fSBarry Smith     /* read in other processors and ship out */
167017699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1671416022c9SBarry Smith       nz = procsnz[i];
1672416022c9SBarry Smith       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1673d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1674416022c9SBarry Smith     }
16750452661fSBarry Smith     PetscFree(procsnz);
1676416022c9SBarry Smith   }
1677d65a2f8fSBarry Smith   else {
1678d65a2f8fSBarry Smith     /* receive numeric values */
16790452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1680416022c9SBarry Smith 
1681d65a2f8fSBarry Smith     /* receive message of values*/
1682d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1683d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1684c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1685d65a2f8fSBarry Smith 
1686d65a2f8fSBarry Smith     /* insert into matrix */
1687d65a2f8fSBarry Smith     jj      = rstart;
1688d65a2f8fSBarry Smith     smycols = mycols;
1689d65a2f8fSBarry Smith     svals   = vals;
1690d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1691d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1692d65a2f8fSBarry Smith       smycols += ourlens[i];
1693d65a2f8fSBarry Smith       svals   += ourlens[i];
1694d65a2f8fSBarry Smith       jj++;
1695d65a2f8fSBarry Smith     }
1696d65a2f8fSBarry Smith   }
16970452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1698d65a2f8fSBarry Smith 
1699d65a2f8fSBarry Smith   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1700d65a2f8fSBarry Smith   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1701416022c9SBarry Smith   return 0;
1702416022c9SBarry Smith }
1703