xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3638b69dbd64ba4780c119c4ea508ad72b54579c)
1cb512458SBarry Smith #ifndef lint
2*3638b69dSLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.119 1996/02/01 17:29:20 bsmith Exp curfman $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int        ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
408a729477SBarry Smith {
4144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
434b0e389bSBarry Smith   Scalar     value;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
464b0e389bSBarry Smith   int        shift = C->indexshift,roworiented = aij->roworiented;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
534b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
544b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
564b0e389bSBarry Smith       row = im[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
584b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
594b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
604b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
614b0e389bSBarry Smith           col = in[j] - cstart;
624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
634b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
641eb62cbbSBarry Smith         }
651eb62cbbSBarry Smith         else {
66227d817aSBarry Smith           if (mat->was_assembled) {
67b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
684b0e389bSBarry Smith             col = aij->colmap[in[j]] + shift;
69ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
702493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
714b0e389bSBarry Smith               col =  in[j];
72d6dfbf8fSBarry Smith             }
739e25ed09SBarry Smith           }
744b0e389bSBarry Smith           else col = in[j];
754b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
764b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
771eb62cbbSBarry Smith         }
781eb62cbbSBarry Smith       }
791eb62cbbSBarry Smith     }
801eb62cbbSBarry Smith     else {
814b0e389bSBarry Smith       if (roworiented) {
824b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
834b0e389bSBarry Smith       }
844b0e389bSBarry Smith       else {
854b0e389bSBarry Smith         row = im[i];
864b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
874b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
884b0e389bSBarry Smith         }
894b0e389bSBarry Smith       }
901eb62cbbSBarry Smith     }
918a729477SBarry Smith   }
928a729477SBarry Smith   return 0;
938a729477SBarry Smith }
948a729477SBarry Smith 
95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96b49de8d1SLois Curfman McInnes {
97b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
101b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
102b49de8d1SLois Curfman McInnes 
103b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
104b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
107b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
108b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
109b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
112b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
113b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114b49de8d1SLois Curfman McInnes         }
115b49de8d1SLois Curfman McInnes         else {
116b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
117b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
118b49de8d1SLois Curfman McInnes         }
119b49de8d1SLois Curfman McInnes       }
120b49de8d1SLois Curfman McInnes     }
121b49de8d1SLois Curfman McInnes     else {
122b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
123b49de8d1SLois Curfman McInnes     }
124b49de8d1SLois Curfman McInnes   }
125b49de8d1SLois Curfman McInnes   return 0;
126b49de8d1SLois Curfman McInnes }
127b49de8d1SLois Curfman McInnes 
128fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1298a729477SBarry Smith {
13044a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
131d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
13217699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
13317699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1341eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1356abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1361eb62cbbSBarry Smith   InsertMode  addv;
1371eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1381eb62cbbSBarry Smith 
1391eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
140d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
141dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
142bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1431eb62cbbSBarry Smith   }
1441eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1451eb62cbbSBarry Smith 
1461eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1470452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
148cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1490452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1501eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1511eb62cbbSBarry Smith     idx = aij->stash.idx[i];
15217699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1531eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1541eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1558a729477SBarry Smith       }
1568a729477SBarry Smith     }
1578a729477SBarry Smith   }
15817699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1591eb62cbbSBarry Smith 
1601eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1610452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
16217699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
16317699dbbSLois Curfman McInnes   nreceives = work[rank];
16417699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
16517699dbbSLois Curfman McInnes   nmax = work[rank];
1660452661fSBarry Smith   PetscFree(work);
1671eb62cbbSBarry Smith 
1681eb62cbbSBarry Smith   /* post receives:
1691eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1701eb62cbbSBarry Smith      (global index,value) we store the global index as a double
171d6dfbf8fSBarry Smith      to simplify the message passing.
1721eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1731eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1741eb62cbbSBarry Smith      this is a lot of wasted space.
1751eb62cbbSBarry Smith 
1761eb62cbbSBarry Smith 
1771eb62cbbSBarry Smith        This could be done better.
1781eb62cbbSBarry Smith   */
1790452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
18078b31e54SBarry Smith   CHKPTRQ(rvalues);
1810452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
18278b31e54SBarry Smith   CHKPTRQ(recv_waits);
1831eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
184416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1851eb62cbbSBarry Smith               comm,recv_waits+i);
1861eb62cbbSBarry Smith   }
1871eb62cbbSBarry Smith 
1881eb62cbbSBarry Smith   /* do sends:
1891eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1901eb62cbbSBarry Smith          the ith processor
1911eb62cbbSBarry Smith   */
1920452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1930452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
19478b31e54SBarry Smith   CHKPTRQ(send_waits);
1950452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1961eb62cbbSBarry Smith   starts[0] = 0;
19717699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1981eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1991eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2001eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2011eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2021eb62cbbSBarry Smith   }
2030452661fSBarry Smith   PetscFree(owner);
2041eb62cbbSBarry Smith   starts[0] = 0;
20517699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2061eb62cbbSBarry Smith   count = 0;
20717699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2081eb62cbbSBarry Smith     if (procs[i]) {
209416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2101eb62cbbSBarry Smith                 comm,send_waits+count++);
2111eb62cbbSBarry Smith     }
2121eb62cbbSBarry Smith   }
2130452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2141eb62cbbSBarry Smith 
2151eb62cbbSBarry Smith   /* Free cache space */
2162191d07cSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
21778b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2181eb62cbbSBarry Smith 
2191eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2201eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2211eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2221eb62cbbSBarry Smith   aij->rmax       = nmax;
2231eb62cbbSBarry Smith 
2241eb62cbbSBarry Smith   return 0;
2251eb62cbbSBarry Smith }
22644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2271eb62cbbSBarry Smith 
228fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2291eb62cbbSBarry Smith {
23044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
231dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2321eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
233416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
234416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2351eb62cbbSBarry Smith   Scalar      *values,val;
2361eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2371eb62cbbSBarry Smith 
2381eb62cbbSBarry Smith   /*  wait on receives */
2391eb62cbbSBarry Smith   while (count) {
240d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2411eb62cbbSBarry Smith     /* unpack receives into our local space */
242d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
243c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2441eb62cbbSBarry Smith     n = n/3;
2451eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
246227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
247227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2481eb62cbbSBarry Smith       val = values[3*i+2];
2491eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2501eb62cbbSBarry Smith         col -= aij->cstart;
2511eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2521eb62cbbSBarry Smith       }
2531eb62cbbSBarry Smith       else {
254227d817aSBarry Smith         if (mat->was_assembled) {
255b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
256dbb450caSBarry Smith           col = aij->colmap[col] + shift;
257ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2582493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
259227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
260d6dfbf8fSBarry Smith           }
2619e25ed09SBarry Smith         }
2621eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2631eb62cbbSBarry Smith       }
2641eb62cbbSBarry Smith     }
2651eb62cbbSBarry Smith     count--;
2661eb62cbbSBarry Smith   }
2670452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2681eb62cbbSBarry Smith 
2691eb62cbbSBarry Smith   /* wait on sends */
2701eb62cbbSBarry Smith   if (aij->nsends) {
2710452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
27278b31e54SBarry Smith     CHKPTRQ(send_status);
2731eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2740452661fSBarry Smith     PetscFree(send_status);
2751eb62cbbSBarry Smith   }
2760452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2771eb62cbbSBarry Smith 
27864119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
27978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
28078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2811eb62cbbSBarry Smith 
2822493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2832493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
284227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
285227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
2862493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2872493cbb0SBarry Smith   }
2882493cbb0SBarry Smith 
289227d817aSBarry Smith   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
29078b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
2915e42470aSBarry Smith   }
29278b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
29378b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2945e42470aSBarry Smith 
2958a729477SBarry Smith   return 0;
2968a729477SBarry Smith }
2978a729477SBarry Smith 
2982ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
2991eb62cbbSBarry Smith {
30044a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
301dbd7a890SLois Curfman McInnes   int        ierr;
30278b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
30378b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3041eb62cbbSBarry Smith   return 0;
3051eb62cbbSBarry Smith }
3061eb62cbbSBarry Smith 
3071eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3081eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3091eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3101eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3111eb62cbbSBarry Smith    routine.
3121eb62cbbSBarry Smith */
31344a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3141eb62cbbSBarry Smith {
31544a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
31617699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3176abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
31817699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3195392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
320d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
321d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3221eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3231eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3241eb62cbbSBarry Smith   IS             istmp;
3251eb62cbbSBarry Smith 
32678b31e54SBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
32778b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3281eb62cbbSBarry Smith 
3291eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3300452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
331cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3320452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3331eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3341eb62cbbSBarry Smith     idx = rows[i];
3351eb62cbbSBarry Smith     found = 0;
33617699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3371eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3381eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3391eb62cbbSBarry Smith       }
3401eb62cbbSBarry Smith     }
341bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3421eb62cbbSBarry Smith   }
34317699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3441eb62cbbSBarry Smith 
3451eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3460452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
34717699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
34817699dbbSLois Curfman McInnes   nrecvs = work[rank];
34917699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
35017699dbbSLois Curfman McInnes   nmax = work[rank];
3510452661fSBarry Smith   PetscFree(work);
3521eb62cbbSBarry Smith 
3531eb62cbbSBarry Smith   /* post receives:   */
3540452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
35578b31e54SBarry Smith   CHKPTRQ(rvalues);
3560452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
35778b31e54SBarry Smith   CHKPTRQ(recv_waits);
3581eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
359416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3601eb62cbbSBarry Smith   }
3611eb62cbbSBarry Smith 
3621eb62cbbSBarry Smith   /* do sends:
3631eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3641eb62cbbSBarry Smith          the ith processor
3651eb62cbbSBarry Smith   */
3660452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3670452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
36878b31e54SBarry Smith   CHKPTRQ(send_waits);
3690452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3701eb62cbbSBarry Smith   starts[0] = 0;
37117699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3721eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3731eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3741eb62cbbSBarry Smith   }
3751eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3761eb62cbbSBarry Smith 
3771eb62cbbSBarry Smith   starts[0] = 0;
37817699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3791eb62cbbSBarry Smith   count = 0;
38017699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3811eb62cbbSBarry Smith     if (procs[i]) {
382416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3831eb62cbbSBarry Smith     }
3841eb62cbbSBarry Smith   }
3850452661fSBarry Smith   PetscFree(starts);
3861eb62cbbSBarry Smith 
38717699dbbSLois Curfman McInnes   base = owners[rank];
3881eb62cbbSBarry Smith 
3891eb62cbbSBarry Smith   /*  wait on receives */
3900452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3911eb62cbbSBarry Smith   source = lens + nrecvs;
3921eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3931eb62cbbSBarry Smith   while (count) {
394d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3951eb62cbbSBarry Smith     /* unpack receives into our local space */
3961eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
397d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
398d6dfbf8fSBarry Smith     lens[imdex]  = n;
3991eb62cbbSBarry Smith     slen += n;
4001eb62cbbSBarry Smith     count--;
4011eb62cbbSBarry Smith   }
4020452661fSBarry Smith   PetscFree(recv_waits);
4031eb62cbbSBarry Smith 
4041eb62cbbSBarry Smith   /* move the data into the send scatter */
4050452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4061eb62cbbSBarry Smith   count = 0;
4071eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4081eb62cbbSBarry Smith     values = rvalues + i*nmax;
4091eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4101eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4111eb62cbbSBarry Smith     }
4121eb62cbbSBarry Smith   }
4130452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4140452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4151eb62cbbSBarry Smith 
4161eb62cbbSBarry Smith   /* actually zap the local rows */
417416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
418464493b3SBarry Smith   PLogObjectParent(A,istmp);
4190452661fSBarry Smith   PetscFree(lrows);
42078b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
42178b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
42278b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4231eb62cbbSBarry Smith 
4241eb62cbbSBarry Smith   /* wait on sends */
4251eb62cbbSBarry Smith   if (nsends) {
4260452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
42778b31e54SBarry Smith     CHKPTRQ(send_status);
4281eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4290452661fSBarry Smith     PetscFree(send_status);
4301eb62cbbSBarry Smith   }
4310452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4321eb62cbbSBarry Smith 
4331eb62cbbSBarry Smith   return 0;
4341eb62cbbSBarry Smith }
4351eb62cbbSBarry Smith 
436416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4371eb62cbbSBarry Smith {
438416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
4391eb62cbbSBarry Smith   int        ierr;
440416022c9SBarry Smith 
44164119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
442416022c9SBarry Smith   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
44364119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
444416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4451eb62cbbSBarry Smith   return 0;
4461eb62cbbSBarry Smith }
4471eb62cbbSBarry Smith 
448416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
449da3a660dSBarry Smith {
450416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
451da3a660dSBarry Smith   int        ierr;
45264119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
453416022c9SBarry Smith   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
45464119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
455416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
456da3a660dSBarry Smith   return 0;
457da3a660dSBarry Smith }
458da3a660dSBarry Smith 
459416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
460da3a660dSBarry Smith {
461416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
462da3a660dSBarry Smith   int        ierr;
463da3a660dSBarry Smith 
464da3a660dSBarry Smith   /* do nondiagonal part */
465416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
466da3a660dSBarry Smith   /* send it on its way */
467416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
46864119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
469da3a660dSBarry Smith   /* do local part */
470416022c9SBarry Smith   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
471da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
472da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
473da3a660dSBarry Smith   /* but is not perhaps always true. */
474416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
47564119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
476da3a660dSBarry Smith   return 0;
477da3a660dSBarry Smith }
478da3a660dSBarry Smith 
479416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
480da3a660dSBarry Smith {
481416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
482da3a660dSBarry Smith   int        ierr;
483da3a660dSBarry Smith 
484da3a660dSBarry Smith   /* do nondiagonal part */
485416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
486da3a660dSBarry Smith   /* send it on its way */
487416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
48864119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
489da3a660dSBarry Smith   /* do local part */
490416022c9SBarry Smith   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
491da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
492da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
493da3a660dSBarry Smith   /* but is not perhaps always true. */
494416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
49564119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
496da3a660dSBarry Smith   return 0;
497da3a660dSBarry Smith }
498da3a660dSBarry Smith 
4991eb62cbbSBarry Smith /*
5001eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5011eb62cbbSBarry Smith    diagonal block
5021eb62cbbSBarry Smith */
503416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5041eb62cbbSBarry Smith {
505416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
506416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5071eb62cbbSBarry Smith }
5081eb62cbbSBarry Smith 
509052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
510052efed2SBarry Smith {
511052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
512052efed2SBarry Smith   int        ierr;
513052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
514052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
515052efed2SBarry Smith   return 0;
516052efed2SBarry Smith }
517052efed2SBarry Smith 
51844a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5191eb62cbbSBarry Smith {
5201eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
52144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5221eb62cbbSBarry Smith   int        ierr;
523a5a9c739SBarry Smith #if defined(PETSC_LOG)
5246e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
525a5a9c739SBarry Smith #endif
5260452661fSBarry Smith   PetscFree(aij->rowners);
52778b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
52878b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5290452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5300452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5311eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
532a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5330452661fSBarry Smith   PetscFree(aij);
534a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5350452661fSBarry Smith   PetscHeaderDestroy(mat);
5361eb62cbbSBarry Smith   return 0;
5371eb62cbbSBarry Smith }
5387857610eSBarry Smith #include "draw.h"
539b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
540ee50ffe9SBarry Smith 
541416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5421eb62cbbSBarry Smith {
543416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
544416022c9SBarry Smith   int         ierr;
545416022c9SBarry Smith 
54617699dbbSLois Curfman McInnes   if (aij->size == 1) {
547416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
548416022c9SBarry Smith   }
549a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
550416022c9SBarry Smith   return 0;
551416022c9SBarry Smith }
552416022c9SBarry Smith 
553d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
554416022c9SBarry Smith {
55544a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
556dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
557a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
558d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
559d636dbe3SBarry Smith   FILE        *fd;
560416022c9SBarry Smith 
561416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
562416022c9SBarry Smith     ierr = ViewerFileGetFormat_Private(viewer,&format);
56308480c60SBarry Smith     if (format == FILE_FORMAT_INFO_DETAILED) {
564a56f8943SBarry Smith       int nz,nzalloc,mem;
565a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
566227d817aSBarry Smith       ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
567a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
568a56f8943SBarry Smith       MPIU_Seq_begin(mat->comm,1);
569a56f8943SBarry Smith       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
570a56f8943SBarry Smith                 nzalloc,mem);
57108480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
57208480c60SBarry Smith       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
57308480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
57408480c60SBarry Smith       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
575a56f8943SBarry Smith       fflush(fd);
576a56f8943SBarry Smith       MPIU_Seq_end(mat->comm,1);
577a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
578416022c9SBarry Smith       return 0;
579416022c9SBarry Smith     }
58008480c60SBarry Smith     else if (format == FILE_FORMAT_INFO) {
58108480c60SBarry Smith       return 0;
58208480c60SBarry Smith     }
583416022c9SBarry Smith   }
584416022c9SBarry Smith 
585416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER) {
586227d817aSBarry Smith     ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
5877f813858SBarry Smith     MPIU_Seq_begin(mat->comm,1);
588d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
58917699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5901eb62cbbSBarry Smith            aij->cend);
59178b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
59278b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
593d13ab20cSBarry Smith     fflush(fd);
5947f813858SBarry Smith     MPIU_Seq_end(mat->comm,1);
595d13ab20cSBarry Smith   }
596416022c9SBarry Smith   else {
597a56f8943SBarry Smith     int size = aij->size;
598a56f8943SBarry Smith     rank = aij->rank;
59917699dbbSLois Curfman McInnes     if (size == 1) {
60078b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
60195373324SBarry Smith     }
60295373324SBarry Smith     else {
60395373324SBarry Smith       /* assemble the entire matrix onto first processor. */
60495373324SBarry Smith       Mat         A;
605ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6062eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
60795373324SBarry Smith       Scalar      *a;
6082ee70a88SLois Curfman McInnes 
60917699dbbSLois Curfman McInnes       if (!rank) {
610b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
611c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
61295373324SBarry Smith       }
61395373324SBarry Smith       else {
614b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
615c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
61695373324SBarry Smith       }
617464493b3SBarry Smith       PLogObjectParent(mat,A);
618416022c9SBarry Smith 
61995373324SBarry Smith       /* copy over the A part */
620ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6212ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
62295373324SBarry Smith       row = aij->rstart;
623dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
62495373324SBarry Smith       for ( i=0; i<m; i++ ) {
625416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
62695373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
62795373324SBarry Smith       }
6282ee70a88SLois Curfman McInnes       aj = Aloc->j;
629dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
63095373324SBarry Smith 
63195373324SBarry Smith       /* copy over the B part */
632ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6332ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
63495373324SBarry Smith       row = aij->rstart;
6350452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
636dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
63795373324SBarry Smith       for ( i=0; i<m; i++ ) {
638416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
63995373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
64095373324SBarry Smith       }
6410452661fSBarry Smith       PetscFree(ct);
64278b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
64378b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
64417699dbbSLois Curfman McInnes       if (!rank) {
64578b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
64695373324SBarry Smith       }
64778b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
64895373324SBarry Smith     }
64995373324SBarry Smith   }
6501eb62cbbSBarry Smith   return 0;
6511eb62cbbSBarry Smith }
6521eb62cbbSBarry Smith 
653416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
654416022c9SBarry Smith {
655416022c9SBarry Smith   Mat         mat = (Mat) obj;
656416022c9SBarry Smith   int         ierr;
657416022c9SBarry Smith   PetscObject vobj = (PetscObject) viewer;
658416022c9SBarry Smith 
659416022c9SBarry Smith   if (!viewer) {
660416022c9SBarry Smith     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
661416022c9SBarry Smith   }
662416022c9SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
663416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
664d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
665416022c9SBarry Smith   }
666416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
667d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
668d7e8b826SBarry Smith   }
669d7e8b826SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
670d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
671416022c9SBarry Smith   }
672416022c9SBarry Smith   else if (vobj->cookie == DRAW_COOKIE) {
673d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
674416022c9SBarry Smith   }
675416022c9SBarry Smith   else if (vobj->type == BINARY_FILE_VIEWER) {
676416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
677416022c9SBarry Smith   }
678416022c9SBarry Smith   return 0;
679416022c9SBarry Smith }
680416022c9SBarry Smith 
681ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat);
6821eb62cbbSBarry Smith /*
6831eb62cbbSBarry Smith     This has to provide several versions.
6841eb62cbbSBarry Smith 
6851eb62cbbSBarry Smith      1) per sequential
6861eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6871eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
688d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
6891eb62cbbSBarry Smith */
690fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
691dbb450caSBarry Smith                            double fshift,int its,Vec xx)
6928a729477SBarry Smith {
69344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
694d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
695ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
6966abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
6976abc6512SBarry Smith   int        ierr,*idx, *diag;
698416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
699da3a660dSBarry Smith   Vec        tt;
7008a729477SBarry Smith 
701d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
702dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
703dbb450caSBarry Smith   ls = ls + shift;
704ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
705d6dfbf8fSBarry Smith   diag = A->diag;
706acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
70748d91487SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
708acb40c82SBarry Smith   }
709da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
710da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
711da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
712da3a660dSBarry Smith 
713da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
714da3a660dSBarry Smith 
715da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
716da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
717da3a660dSBarry Smith     is the relaxation factor.
718da3a660dSBarry Smith     */
71978b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
720464493b3SBarry Smith     PLogObjectParent(matin,tt);
721da3a660dSBarry Smith     VecGetArray(tt,&t);
722da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
723da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
724da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
72564119d58SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
72678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
727da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
728da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
729dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
730dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
731da3a660dSBarry Smith       sum  = b[i];
732da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
733dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
734da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
735dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
736dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
737da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
738da3a660dSBarry Smith       x[i] = omega*(sum/d);
739da3a660dSBarry Smith     }
74064119d58SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
74178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
742da3a660dSBarry Smith 
743da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
744da3a660dSBarry Smith     v = A->a;
745dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
746da3a660dSBarry Smith 
747da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
748dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
749da3a660dSBarry Smith     diag = A->diag;
750da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
75164119d58SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
75278b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
753da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
754da3a660dSBarry Smith       n    = diag[i] - A->i[i];
755dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
756dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
757da3a660dSBarry Smith       sum  = t[i];
758da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
759dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
760da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
761dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
762dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
763da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
764da3a660dSBarry Smith       t[i] = omega*(sum/d);
765da3a660dSBarry Smith     }
76664119d58SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
76778b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
768da3a660dSBarry Smith     /*  x = x + t */
769da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
770da3a660dSBarry Smith     VecDestroy(tt);
771da3a660dSBarry Smith     return 0;
772da3a660dSBarry Smith   }
773da3a660dSBarry Smith 
7741eb62cbbSBarry Smith 
775d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
776da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
777da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
778da3a660dSBarry Smith     }
779da3a660dSBarry Smith     else {
78064119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
78178b31e54SBarry Smith       CHKERRQ(ierr);
78264119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
78378b31e54SBarry Smith       CHKERRQ(ierr);
784da3a660dSBarry Smith     }
785d6dfbf8fSBarry Smith     while (its--) {
786d6dfbf8fSBarry Smith       /* go down through the rows */
78764119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
78878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
789d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
790d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
791dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
792dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
793d6dfbf8fSBarry Smith         sum  = b[i];
794d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
795dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
796d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
797dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
798dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
799d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
800d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
801d6dfbf8fSBarry Smith       }
80264119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
80378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
804d6dfbf8fSBarry Smith       /* come up through the rows */
80564119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
80678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
807d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
808d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
809dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
810dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
811d6dfbf8fSBarry Smith         sum  = b[i];
812d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
813dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
814d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
815dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
816dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
817d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
818d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
819d6dfbf8fSBarry Smith       }
82064119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
82178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
822d6dfbf8fSBarry Smith     }
823d6dfbf8fSBarry Smith   }
824d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
825da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
826da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
82764119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
82878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
829da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
830da3a660dSBarry Smith         n    = diag[i] - A->i[i];
831dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
832dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
833da3a660dSBarry Smith         sum  = b[i];
834da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
835dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
836da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
837dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
838dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
839da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
840da3a660dSBarry Smith         x[i] = omega*(sum/d);
841da3a660dSBarry Smith       }
84264119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
84378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
844da3a660dSBarry Smith       its--;
845da3a660dSBarry Smith     }
846d6dfbf8fSBarry Smith     while (its--) {
84764119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
84878b31e54SBarry Smith       CHKERRQ(ierr);
84964119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
85078b31e54SBarry Smith       CHKERRQ(ierr);
85164119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
85278b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
853d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
854d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
855dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
856dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
857d6dfbf8fSBarry Smith         sum  = b[i];
858d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
859dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
860d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
861dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
862dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
863d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
864d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
865d6dfbf8fSBarry Smith       }
86664119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
86778b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
868d6dfbf8fSBarry Smith     }
869d6dfbf8fSBarry Smith   }
870d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
871da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
872da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
87364119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
87478b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
875da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
876da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
877dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
878dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
879da3a660dSBarry Smith         sum  = b[i];
880da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
881dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
882da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
883dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
884dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
885da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
886da3a660dSBarry Smith         x[i] = omega*(sum/d);
887da3a660dSBarry Smith       }
88864119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
88978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
890da3a660dSBarry Smith       its--;
891da3a660dSBarry Smith     }
892d6dfbf8fSBarry Smith     while (its--) {
89364119d58SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
89478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
89564119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
89678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
89764119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
89878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
899d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
900d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
901dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
902dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
903d6dfbf8fSBarry Smith         sum  = b[i];
904d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
905dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
906d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
907dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
908dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
909d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
910d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
911d6dfbf8fSBarry Smith       }
91264119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
91378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
914d6dfbf8fSBarry Smith     }
915d6dfbf8fSBarry Smith   }
916d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
917da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
918dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
919da3a660dSBarry Smith     }
92064119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
92178b31e54SBarry Smith     CHKERRQ(ierr);
92264119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
92378b31e54SBarry Smith     CHKERRQ(ierr);
924d6dfbf8fSBarry Smith     while (its--) {
925d6dfbf8fSBarry Smith       /* go down through the rows */
926d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
927d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
928dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
929dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
930d6dfbf8fSBarry Smith         sum  = b[i];
931d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
932dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
933d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
934dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
935dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
936d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
937d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
938d6dfbf8fSBarry Smith       }
939d6dfbf8fSBarry Smith       /* come up through the rows */
940d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
941d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
942dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
943dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
944d6dfbf8fSBarry Smith         sum  = b[i];
945d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
946dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
947d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
948dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
949dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
950d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
951d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
952d6dfbf8fSBarry Smith       }
953d6dfbf8fSBarry Smith     }
954d6dfbf8fSBarry Smith   }
955d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
956da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
957dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
958da3a660dSBarry Smith     }
95964119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
96078b31e54SBarry Smith     CHKERRQ(ierr);
96164119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
96278b31e54SBarry Smith     CHKERRQ(ierr);
963d6dfbf8fSBarry Smith     while (its--) {
964d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
965d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
966dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
967dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
968d6dfbf8fSBarry Smith         sum  = b[i];
969d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
970dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
971d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
972dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
973dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
974d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
975d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
976d6dfbf8fSBarry Smith       }
977d6dfbf8fSBarry Smith     }
978d6dfbf8fSBarry Smith   }
979d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
980da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
981dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
982da3a660dSBarry Smith     }
98364119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
98478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
98564119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
98678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
987d6dfbf8fSBarry Smith     while (its--) {
988d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
989d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
990dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
991dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
992d6dfbf8fSBarry Smith         sum  = b[i];
993d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
994dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
995d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
996dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
997dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
998d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
999d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1000d6dfbf8fSBarry Smith       }
1001d6dfbf8fSBarry Smith     }
1002d6dfbf8fSBarry Smith   }
10038a729477SBarry Smith   return 0;
10048a729477SBarry Smith }
1005a66be287SLois Curfman McInnes 
1006fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1007fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
1008a66be287SLois Curfman McInnes {
1009a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1010a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
1011a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1012a66be287SLois Curfman McInnes 
101378b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
101478b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1015a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1016a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
1017a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1018a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1019d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1020a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1021a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1022d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1023a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1024a66be287SLois Curfman McInnes   }
1025a66be287SLois Curfman McInnes   return 0;
1026a66be287SLois Curfman McInnes }
1027a66be287SLois Curfman McInnes 
1028299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1029299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1030299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1031299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1032299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1033299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1034299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1035299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1036299609e3SLois Curfman McInnes 
1037416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1038c74985f6SBarry Smith {
1039c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1040c74985f6SBarry Smith 
1041c0bbcb79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
1042c0bbcb79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
1043c0bbcb79SLois Curfman McInnes       op == COLUMNS_SORTED ||
1044c0bbcb79SLois Curfman McInnes       op == ROW_ORIENTED) {
1045c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
1046c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
1047c74985f6SBarry Smith   }
1048c0bbcb79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
1049c0bbcb79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
1050c0bbcb79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1051c0bbcb79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
1052c0bbcb79SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10534b0e389bSBarry Smith   else if (op == COLUMN_ORIENTED) {
10544b0e389bSBarry Smith     a->roworiented = 0;
10554b0e389bSBarry Smith     MatSetOption(a->A,op);
10564b0e389bSBarry Smith     MatSetOption(a->B,op);
10574b0e389bSBarry Smith   }
1058c0bbcb79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
10594d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1060c0bbcb79SLois Curfman McInnes   else
10614d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1062c74985f6SBarry Smith   return 0;
1063c74985f6SBarry Smith }
1064c74985f6SBarry Smith 
1065d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1066c74985f6SBarry Smith {
106744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1068c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1069c74985f6SBarry Smith   return 0;
1070c74985f6SBarry Smith }
1071c74985f6SBarry Smith 
1072d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1073c74985f6SBarry Smith {
107444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1075b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1076c74985f6SBarry Smith   return 0;
1077c74985f6SBarry Smith }
1078c74985f6SBarry Smith 
1079d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1080c74985f6SBarry Smith {
108144a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1082c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1083c74985f6SBarry Smith   return 0;
1084c74985f6SBarry Smith }
1085c74985f6SBarry Smith 
108639e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
108739e00950SLois Curfman McInnes {
1088154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1089154123eaSLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1090154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1091154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
109239e00950SLois Curfman McInnes 
1093b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1094abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
109539e00950SLois Curfman McInnes 
1096154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1097154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1098154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
109978b31e54SBarry Smith   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
110078b31e54SBarry Smith   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1101154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1102154123eaSLois Curfman McInnes 
1103154123eaSLois Curfman McInnes   if (v  || idx) {
1104154123eaSLois Curfman McInnes     if (nztot) {
1105154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1106299609e3SLois Curfman McInnes       int imark;
1107154123eaSLois Curfman McInnes       for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1108154123eaSLois Curfman McInnes       if (v) {
11090452661fSBarry Smith         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
111039e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1111154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1112154123eaSLois Curfman McInnes           else break;
1113154123eaSLois Curfman McInnes         }
1114154123eaSLois Curfman McInnes         imark = i;
1115154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1116299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1117154123eaSLois Curfman McInnes       }
1118154123eaSLois Curfman McInnes       if (idx) {
11190452661fSBarry Smith         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1120154123eaSLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1121154123eaSLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1122154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1123154123eaSLois Curfman McInnes           else break;
1124154123eaSLois Curfman McInnes         }
1125154123eaSLois Curfman McInnes         imark = i;
1126154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1127299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
112839e00950SLois Curfman McInnes       }
112939e00950SLois Curfman McInnes     }
113039e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1131154123eaSLois Curfman McInnes   }
113239e00950SLois Curfman McInnes   *nz = nztot;
113378b31e54SBarry Smith   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
113478b31e54SBarry Smith   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
113539e00950SLois Curfman McInnes   return 0;
113639e00950SLois Curfman McInnes }
113739e00950SLois Curfman McInnes 
113839e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
113939e00950SLois Curfman McInnes {
11400452661fSBarry Smith   if (idx) PetscFree(*idx);
11410452661fSBarry Smith   if (v) PetscFree(*v);
114239e00950SLois Curfman McInnes   return 0;
114339e00950SLois Curfman McInnes }
114439e00950SLois Curfman McInnes 
1145cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1146855ac2c5SLois Curfman McInnes {
1147855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1148ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1149416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1150416022c9SBarry Smith   double     sum = 0.0;
115104ca555eSLois Curfman McInnes   Scalar     *v;
115204ca555eSLois Curfman McInnes 
115317699dbbSLois Curfman McInnes   if (aij->size == 1) {
115414183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
115537fa93a5SLois Curfman McInnes   } else {
115604ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
115704ca555eSLois Curfman McInnes       v = amat->a;
115804ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
115904ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
116004ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
116104ca555eSLois Curfman McInnes #else
116204ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
116304ca555eSLois Curfman McInnes #endif
116404ca555eSLois Curfman McInnes       }
116504ca555eSLois Curfman McInnes       v = bmat->a;
116604ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
116704ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
116804ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
116904ca555eSLois Curfman McInnes #else
117004ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
117104ca555eSLois Curfman McInnes #endif
117204ca555eSLois Curfman McInnes       }
117304ca555eSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
117404ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
117504ca555eSLois Curfman McInnes     }
117604ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
117704ca555eSLois Curfman McInnes       double *tmp, *tmp2;
117804ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
11790452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
11800452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1181cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
118204ca555eSLois Curfman McInnes       *norm = 0.0;
118304ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
118404ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1185579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
118604ca555eSLois Curfman McInnes       }
118704ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
118804ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1189579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
119004ca555eSLois Curfman McInnes       }
119104ca555eSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
119204ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
119304ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
119404ca555eSLois Curfman McInnes       }
11950452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
119604ca555eSLois Curfman McInnes     }
119704ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1198515d9167SLois Curfman McInnes       double ntemp = 0.0;
119904ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1200dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
120104ca555eSLois Curfman McInnes         sum = 0.0;
120204ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1203cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
120404ca555eSLois Curfman McInnes         }
1205dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
120604ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1207cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
120804ca555eSLois Curfman McInnes         }
1209515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
121004ca555eSLois Curfman McInnes       }
1211515d9167SLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
121204ca555eSLois Curfman McInnes     }
121304ca555eSLois Curfman McInnes     else {
1214515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
121504ca555eSLois Curfman McInnes     }
121637fa93a5SLois Curfman McInnes   }
1217855ac2c5SLois Curfman McInnes   return 0;
1218855ac2c5SLois Curfman McInnes }
1219855ac2c5SLois Curfman McInnes 
12200de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1221b7c46309SBarry Smith {
1222b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1223dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1224416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1225416022c9SBarry Smith   Mat        B;
1226b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1227b7c46309SBarry Smith   Scalar     *array;
1228b7c46309SBarry Smith 
1229*3638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
1230*3638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1231b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1232b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1233b7c46309SBarry Smith 
1234b7c46309SBarry Smith   /* copy over the A part */
1235ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1236b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1237b7c46309SBarry Smith   row = a->rstart;
1238dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1239b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1240416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1241b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1242b7c46309SBarry Smith   }
1243b7c46309SBarry Smith   aj = Aloc->j;
1244dbb450caSBarry Smith   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1245b7c46309SBarry Smith 
1246b7c46309SBarry Smith   /* copy over the B part */
1247ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1248b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1249b7c46309SBarry Smith   row = a->rstart;
12500452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1251dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1252b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1253416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1254b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1255b7c46309SBarry Smith   }
12560452661fSBarry Smith   PetscFree(ct);
1257b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1258b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1259*3638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
12600de55854SLois Curfman McInnes     *matout = B;
12610de55854SLois Curfman McInnes   } else {
12620de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12630452661fSBarry Smith     PetscFree(a->rowners);
12640de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
12650de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
12660452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
12670452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
12680de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1269a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
12700452661fSBarry Smith     PetscFree(a);
1271416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
12720452661fSBarry Smith     PetscHeaderDestroy(B);
12730de55854SLois Curfman McInnes   }
1274b7c46309SBarry Smith   return 0;
1275b7c46309SBarry Smith }
1276b7c46309SBarry Smith 
1277682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1278682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1279682d7d0cSBarry Smith {
1280682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1281682d7d0cSBarry Smith 
1282682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1283682d7d0cSBarry Smith   else return 0;
1284682d7d0cSBarry Smith }
1285682d7d0cSBarry Smith 
1286fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
12873d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
12882f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
12898a729477SBarry Smith /* -------------------------------------------------------------------*/
12902ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
129139e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
129244a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
129344a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1294299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1295299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1296299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
129744a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1298b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1299a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1300855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1301ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13021eb62cbbSBarry Smith        0,
1303299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1304299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1305299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1306d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1307299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
13083d1612f7SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1309b49de8d1SLois Curfman McInnes        0,0,0,
13107fab95ffSSatish Balay        0,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1311052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
1312052efed2SBarry Smith        MatScale_MPIAIJ};
13138a729477SBarry Smith 
13141987afe7SBarry Smith /*@C
1315ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1316b810aeb4SBarry Smith    (the default parallel PETSc format). For good performance you should
1317b810aeb4SBarry Smith    preallocate the matrix storage by setting the parameters d_nz or d_nzz and
1318b810aeb4SBarry Smith    o_nz or o_nzz. By setting these parameters accurately performance can
1319b810aeb4SBarry Smith    be increased by more then a factor of 50.
13208a729477SBarry Smith 
13218a729477SBarry Smith    Input Parameters:
13221eb62cbbSBarry Smith .  comm - MPI communicator
13237d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
13247d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
13257d3e4905SLois Curfman McInnes            if N is given)
13267d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
13277d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
13287d3e4905SLois Curfman McInnes            if n is given)
1329ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1330ff756334SLois Curfman McInnes            (same for all local rows)
1331ab693e5aSLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1332ab693e5aSLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1333ab693e5aSLois Curfman McInnes            for the diagonal entry even if it is zero.
1334ab693e5aSLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1335ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
1336ab693e5aSLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1337ab693e5aSLois Curfman McInnes            submatrix or null (possibly different for each row).
13388a729477SBarry Smith 
1339ff756334SLois Curfman McInnes    Output Parameter:
13408a729477SBarry Smith .  newmat - the matrix
13418a729477SBarry Smith 
1342ff756334SLois Curfman McInnes    Notes:
1343ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1344ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
13450002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
13460002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1347ff756334SLois Curfman McInnes 
1348ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1349ff756334SLois Curfman McInnes    (possibly both).
1350ff756334SLois Curfman McInnes 
1351e0245417SLois Curfman McInnes    Storage Information:
1352e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1353e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1354e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1355e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1356e0245417SLois Curfman McInnes 
1357e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
13585ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
13595ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
13605ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
13615ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1362ff756334SLois Curfman McInnes 
1363b810aeb4SBarry Smith    Consider a processor that owns rows 3, 4 and 5 (in this figure we depict
1364b810aeb4SBarry Smith    those three rows and all columns (0-11).
13652191d07cSBarry Smith 
1366b810aeb4SBarry Smith $  0   1 2 3 4 5 6 7 8 9 10 11
1367b810aeb4SBarry Smith $  -------------------
1368b810aeb4SBarry Smith $  3 | o o o d d d o o o o o o
1369b810aeb4SBarry Smith $  4 | o o o d d d o o o o o o
1370b810aeb4SBarry Smith $  5 | o o o d d d o o o o o o
1371b810aeb4SBarry Smith $
1372b810aeb4SBarry Smith      Thus any entries in the d locations are stored in the d matrix and
1373b810aeb4SBarry Smith    any in the o locations are store in the o matrix. (The d and the o matrix
1374b810aeb4SBarry Smith    are simply SeqAIJ matrix (that is they store the matrices in compress
1375b810aeb4SBarry Smith    row storage)).
1376b810aeb4SBarry Smith 
1377b810aeb4SBarry Smith    Now d_nz should give the number of non zeros per row in the d matrix
1378b810aeb4SBarry Smith    and o_nz should give the number of nonzeros per row in the o matrix.
1379b810aeb4SBarry Smith    In general for PDE problems where most nonzeros are near the diagonal
1380b810aeb4SBarry Smith    one expects d_nz >> o_nz. See the users manual chapter on matrices for
1381b810aeb4SBarry Smith    more details.
13822191d07cSBarry Smith 
1383c451ab25SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
1384c451ab25SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
1385c451ab25SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
1386c451ab25SLois Curfman McInnes 
1387c451ab25SLois Curfman McInnes    Options Database Keys:
1388c451ab25SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
1389c451ab25SLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit (max limit=5)
1390c451ab25SLois Curfman McInnes 
1391dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1392ff756334SLois Curfman McInnes 
1393fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
13948a729477SBarry Smith @*/
13951eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
13961eb62cbbSBarry Smith                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
13978a729477SBarry Smith {
13988a729477SBarry Smith   Mat          mat;
1399416022c9SBarry Smith   Mat_MPIAIJ   *a;
14006abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1401416022c9SBarry Smith 
14028a729477SBarry Smith   *newmat         = 0;
14030452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1404a5a9c739SBarry Smith   PLogObjectCreate(mat);
14050452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1406416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
140744a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
140844a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
14098a729477SBarry Smith   mat->factor     = 0;
1410c456f294SBarry Smith   mat->assembled  = PETSC_FALSE;
1411d6dfbf8fSBarry Smith 
141264119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
141317699dbbSLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
141417699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
14151eb62cbbSBarry Smith 
1416b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
14171987afe7SBarry Smith     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
14181987afe7SBarry Smith 
1419dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
14201eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1421d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1422dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1423dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
14241eb62cbbSBarry Smith   }
142517699dbbSLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
142617699dbbSLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1427416022c9SBarry Smith   a->m = m;
1428416022c9SBarry Smith   a->n = n;
1429416022c9SBarry Smith   a->N = N;
1430416022c9SBarry Smith   a->M = M;
14311eb62cbbSBarry Smith 
14321eb62cbbSBarry Smith   /* build local table of row and column ownerships */
14330452661fSBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1434579c6b6fSBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
143517699dbbSLois Curfman McInnes   a->cowners = a->rowners + a->size + 1;
1436416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1437416022c9SBarry Smith   a->rowners[0] = 0;
143817699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1439416022c9SBarry Smith     a->rowners[i] += a->rowners[i-1];
14408a729477SBarry Smith   }
144117699dbbSLois Curfman McInnes   a->rstart = a->rowners[a->rank];
144217699dbbSLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
1443416022c9SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1444416022c9SBarry Smith   a->cowners[0] = 0;
144517699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1446416022c9SBarry Smith     a->cowners[i] += a->cowners[i-1];
14478a729477SBarry Smith   }
144817699dbbSLois Curfman McInnes   a->cstart = a->cowners[a->rank];
144917699dbbSLois Curfman McInnes   a->cend   = a->cowners[a->rank+1];
14508a729477SBarry Smith 
14515ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1452416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1453416022c9SBarry Smith   PLogObjectParent(mat,a->A);
14547b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1455416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1456416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14578a729477SBarry Smith 
14581eb62cbbSBarry Smith   /* build cache for off array entries formed */
1459416022c9SBarry Smith   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1460416022c9SBarry Smith   a->colmap      = 0;
1461416022c9SBarry Smith   a->garray      = 0;
14624b0e389bSBarry Smith   a->roworiented = 1;
14638a729477SBarry Smith 
14641eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
1465416022c9SBarry Smith   a->lvec      = 0;
1466416022c9SBarry Smith   a->Mvctx     = 0;
14678a729477SBarry Smith 
1468d6dfbf8fSBarry Smith   *newmat = mat;
1469d6dfbf8fSBarry Smith   return 0;
1470d6dfbf8fSBarry Smith }
1471c74985f6SBarry Smith 
14723d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1473d6dfbf8fSBarry Smith {
1474d6dfbf8fSBarry Smith   Mat        mat;
1475416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
147625cdf11fSBarry Smith   int        ierr, len,flg;
1477d6dfbf8fSBarry Smith 
1478416022c9SBarry Smith   *newmat       = 0;
14790452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1480a5a9c739SBarry Smith   PLogObjectCreate(mat);
14810452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1482416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
148344a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
148444a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1485d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1486c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1487d6dfbf8fSBarry Smith 
1488416022c9SBarry Smith   a->m          = oldmat->m;
1489416022c9SBarry Smith   a->n          = oldmat->n;
1490416022c9SBarry Smith   a->M          = oldmat->M;
1491416022c9SBarry Smith   a->N          = oldmat->N;
1492d6dfbf8fSBarry Smith 
1493416022c9SBarry Smith   a->rstart     = oldmat->rstart;
1494416022c9SBarry Smith   a->rend       = oldmat->rend;
1495416022c9SBarry Smith   a->cstart     = oldmat->cstart;
1496416022c9SBarry Smith   a->cend       = oldmat->cend;
149717699dbbSLois Curfman McInnes   a->size       = oldmat->size;
149817699dbbSLois Curfman McInnes   a->rank       = oldmat->rank;
149964119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
1500d6dfbf8fSBarry Smith 
15010452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
150217699dbbSLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
150317699dbbSLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1504416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
15052ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
15060452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1507416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1508416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1509416022c9SBarry Smith   } else a->colmap = 0;
1510ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
15110452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1512464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1513416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1514416022c9SBarry Smith   } else a->garray = 0;
1515d6dfbf8fSBarry Smith 
1516416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1517416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1518a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1519416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1520416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1521416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1522416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1523416022c9SBarry Smith   PLogObjectParent(mat,a->B);
15245dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
152525cdf11fSBarry Smith   if (flg) {
1526682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1527682d7d0cSBarry Smith   }
15288a729477SBarry Smith   *newmat = mat;
15298a729477SBarry Smith   return 0;
15308a729477SBarry Smith }
1531416022c9SBarry Smith 
1532416022c9SBarry Smith #include "sysio.h"
1533416022c9SBarry Smith 
1534c456f294SBarry Smith int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat)
1535416022c9SBarry Smith {
1536d65a2f8fSBarry Smith   Mat          A;
1537416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1538d65a2f8fSBarry Smith   Scalar       *vals,*svals;
1539416022c9SBarry Smith   PetscObject  vobj = (PetscObject) bview;
1540416022c9SBarry Smith   MPI_Comm     comm = vobj->comm;
1541416022c9SBarry Smith   MPI_Status   status;
154217699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1543d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1544d65a2f8fSBarry Smith   int          tag = ((PetscObject)bview)->tag;
1545416022c9SBarry Smith 
154617699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
154717699dbbSLois Curfman McInnes   if (!rank) {
1548416022c9SBarry Smith     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1549416022c9SBarry Smith     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1550c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1551416022c9SBarry Smith   }
1552416022c9SBarry Smith 
1553416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1554416022c9SBarry Smith   M = header[1]; N = header[2];
1555416022c9SBarry Smith   /* determine ownership of all rows */
155617699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
15570452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1558416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1559416022c9SBarry Smith   rowners[0] = 0;
156017699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1561416022c9SBarry Smith     rowners[i] += rowners[i-1];
1562416022c9SBarry Smith   }
156317699dbbSLois Curfman McInnes   rstart = rowners[rank];
156417699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1565416022c9SBarry Smith 
1566416022c9SBarry Smith   /* distribute row lengths to all processors */
15670452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1568416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
156917699dbbSLois Curfman McInnes   if (!rank) {
15700452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1571416022c9SBarry Smith     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
15720452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
157317699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1574d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
15750452661fSBarry Smith     PetscFree(sndcounts);
1576416022c9SBarry Smith   }
1577416022c9SBarry Smith   else {
1578416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1579416022c9SBarry Smith   }
1580416022c9SBarry Smith 
158117699dbbSLois Curfman McInnes   if (!rank) {
1582416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
15830452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1584cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
158517699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1586416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1587416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1588416022c9SBarry Smith       }
1589416022c9SBarry Smith     }
15900452661fSBarry Smith     PetscFree(rowlengths);
1591416022c9SBarry Smith 
1592416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1593416022c9SBarry Smith     maxnz = 0;
159417699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
15950452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1596416022c9SBarry Smith     }
15970452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1598416022c9SBarry Smith 
1599416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1600416022c9SBarry Smith     nz = procsnz[0];
16010452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1602d65a2f8fSBarry Smith     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1603d65a2f8fSBarry Smith 
1604d65a2f8fSBarry Smith     /* read in every one elses and ship off */
160517699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1606d65a2f8fSBarry Smith       nz = procsnz[i];
1607416022c9SBarry Smith       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1608d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1609d65a2f8fSBarry Smith     }
16100452661fSBarry Smith     PetscFree(cols);
1611416022c9SBarry Smith   }
1612416022c9SBarry Smith   else {
1613416022c9SBarry Smith     /* determine buffer space needed for message */
1614416022c9SBarry Smith     nz = 0;
1615416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1616416022c9SBarry Smith       nz += ourlens[i];
1617416022c9SBarry Smith     }
16180452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1619416022c9SBarry Smith 
1620416022c9SBarry Smith     /* receive message of column indices*/
1621d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1622416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1623c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1624416022c9SBarry Smith   }
1625416022c9SBarry Smith 
1626416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1627cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1628416022c9SBarry Smith   jj = 0;
1629416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1630416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1631d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1632416022c9SBarry Smith       jj++;
1633416022c9SBarry Smith     }
1634416022c9SBarry Smith   }
1635d65a2f8fSBarry Smith 
1636d65a2f8fSBarry Smith   /* create our matrix */
1637416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1638416022c9SBarry Smith     ourlens[i] -= offlens[i];
1639416022c9SBarry Smith   }
1640d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1641d65a2f8fSBarry Smith   A = *newmat;
1642d65a2f8fSBarry Smith   MatSetOption(A,COLUMNS_SORTED);
1643d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1644d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1645d65a2f8fSBarry Smith   }
1646416022c9SBarry Smith 
164717699dbbSLois Curfman McInnes   if (!rank) {
16480452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1649416022c9SBarry Smith 
1650416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1651416022c9SBarry Smith     nz = procsnz[0];
1652416022c9SBarry Smith     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1653d65a2f8fSBarry Smith 
1654d65a2f8fSBarry Smith     /* insert into matrix */
1655d65a2f8fSBarry Smith     jj      = rstart;
1656d65a2f8fSBarry Smith     smycols = mycols;
1657d65a2f8fSBarry Smith     svals   = vals;
1658d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1659d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1660d65a2f8fSBarry Smith       smycols += ourlens[i];
1661d65a2f8fSBarry Smith       svals   += ourlens[i];
1662d65a2f8fSBarry Smith       jj++;
1663416022c9SBarry Smith     }
1664416022c9SBarry Smith 
1665d65a2f8fSBarry Smith     /* read in other processors and ship out */
166617699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1667416022c9SBarry Smith       nz = procsnz[i];
1668416022c9SBarry Smith       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1669d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1670416022c9SBarry Smith     }
16710452661fSBarry Smith     PetscFree(procsnz);
1672416022c9SBarry Smith   }
1673d65a2f8fSBarry Smith   else {
1674d65a2f8fSBarry Smith     /* receive numeric values */
16750452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1676416022c9SBarry Smith 
1677d65a2f8fSBarry Smith     /* receive message of values*/
1678d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1679d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1680c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1681d65a2f8fSBarry Smith 
1682d65a2f8fSBarry Smith     /* insert into matrix */
1683d65a2f8fSBarry Smith     jj      = rstart;
1684d65a2f8fSBarry Smith     smycols = mycols;
1685d65a2f8fSBarry Smith     svals   = vals;
1686d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1687d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1688d65a2f8fSBarry Smith       smycols += ourlens[i];
1689d65a2f8fSBarry Smith       svals   += ourlens[i];
1690d65a2f8fSBarry Smith       jj++;
1691d65a2f8fSBarry Smith     }
1692d65a2f8fSBarry Smith   }
16930452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1694d65a2f8fSBarry Smith 
1695d65a2f8fSBarry Smith   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1696d65a2f8fSBarry Smith   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1697416022c9SBarry Smith   return 0;
1698416022c9SBarry Smith }
1699