xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 052efed2d4d49f85092290ff2aaacf5d901780e1)
1cb512458SBarry Smith #ifndef lint
2*052efed2SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.110 1996/01/12 22:31:24 bsmith Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int        ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
408a729477SBarry Smith {
4144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
434b0e389bSBarry Smith   Scalar     value;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
464b0e389bSBarry Smith   int        shift = C->indexshift,roworiented = aij->roworiented;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
534b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
544b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
564b0e389bSBarry Smith       row = im[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
584b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
594b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
604b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
614b0e389bSBarry Smith           col = in[j] - cstart;
624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
634b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
641eb62cbbSBarry Smith         }
651eb62cbbSBarry Smith         else {
66d6dfbf8fSBarry Smith           if (aij->assembled) {
67b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
684b0e389bSBarry Smith             col = aij->colmap[in[j]] + shift;
69ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
702493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
714b0e389bSBarry Smith               col =  in[j];
72d6dfbf8fSBarry Smith             }
739e25ed09SBarry Smith           }
744b0e389bSBarry Smith           else col = in[j];
754b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
764b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
771eb62cbbSBarry Smith         }
781eb62cbbSBarry Smith       }
791eb62cbbSBarry Smith     }
801eb62cbbSBarry Smith     else {
814b0e389bSBarry Smith       if (roworiented) {
824b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
834b0e389bSBarry Smith       }
844b0e389bSBarry Smith       else {
854b0e389bSBarry Smith         row = im[i];
864b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
874b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
884b0e389bSBarry Smith         }
894b0e389bSBarry Smith       }
901eb62cbbSBarry Smith     }
918a729477SBarry Smith   }
928a729477SBarry Smith   return 0;
938a729477SBarry Smith }
948a729477SBarry Smith 
95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96b49de8d1SLois Curfman McInnes {
97b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
101b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
102b49de8d1SLois Curfman McInnes 
103b49de8d1SLois Curfman McInnes   if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix");
104b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
105b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
106b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
107b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
108b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
109b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
110b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
111b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
112b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
113b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
114b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
115b49de8d1SLois Curfman McInnes         }
116b49de8d1SLois Curfman McInnes         else {
117b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
118b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
119b49de8d1SLois Curfman McInnes         }
120b49de8d1SLois Curfman McInnes       }
121b49de8d1SLois Curfman McInnes     }
122b49de8d1SLois Curfman McInnes     else {
123b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
124b49de8d1SLois Curfman McInnes     }
125b49de8d1SLois Curfman McInnes   }
126b49de8d1SLois Curfman McInnes   return 0;
127b49de8d1SLois Curfman McInnes }
128b49de8d1SLois Curfman McInnes 
129fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1308a729477SBarry Smith {
13144a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
132d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
13317699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
13417699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1351eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1366abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1371eb62cbbSBarry Smith   InsertMode  addv;
1381eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1391eb62cbbSBarry Smith 
1401eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
141d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
142dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
143bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1441eb62cbbSBarry Smith   }
1451eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1461eb62cbbSBarry Smith 
1471eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1480452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
149cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1500452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1511eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1521eb62cbbSBarry Smith     idx = aij->stash.idx[i];
15317699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1541eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1551eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1568a729477SBarry Smith       }
1578a729477SBarry Smith     }
1588a729477SBarry Smith   }
15917699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1601eb62cbbSBarry Smith 
1611eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1620452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
16317699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
16417699dbbSLois Curfman McInnes   nreceives = work[rank];
16517699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
16617699dbbSLois Curfman McInnes   nmax = work[rank];
1670452661fSBarry Smith   PetscFree(work);
1681eb62cbbSBarry Smith 
1691eb62cbbSBarry Smith   /* post receives:
1701eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1711eb62cbbSBarry Smith      (global index,value) we store the global index as a double
172d6dfbf8fSBarry Smith      to simplify the message passing.
1731eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1741eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1751eb62cbbSBarry Smith      this is a lot of wasted space.
1761eb62cbbSBarry Smith 
1771eb62cbbSBarry Smith 
1781eb62cbbSBarry Smith        This could be done better.
1791eb62cbbSBarry Smith   */
1800452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
18178b31e54SBarry Smith   CHKPTRQ(rvalues);
1820452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
18378b31e54SBarry Smith   CHKPTRQ(recv_waits);
1841eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
185416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1861eb62cbbSBarry Smith               comm,recv_waits+i);
1871eb62cbbSBarry Smith   }
1881eb62cbbSBarry Smith 
1891eb62cbbSBarry Smith   /* do sends:
1901eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1911eb62cbbSBarry Smith          the ith processor
1921eb62cbbSBarry Smith   */
1930452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1940452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
19578b31e54SBarry Smith   CHKPTRQ(send_waits);
1960452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1971eb62cbbSBarry Smith   starts[0] = 0;
19817699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1991eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2001eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2011eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2021eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2031eb62cbbSBarry Smith   }
2040452661fSBarry Smith   PetscFree(owner);
2051eb62cbbSBarry Smith   starts[0] = 0;
20617699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2071eb62cbbSBarry Smith   count = 0;
20817699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2091eb62cbbSBarry Smith     if (procs[i]) {
210416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2111eb62cbbSBarry Smith                 comm,send_waits+count++);
2121eb62cbbSBarry Smith     }
2131eb62cbbSBarry Smith   }
2140452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2151eb62cbbSBarry Smith 
2161eb62cbbSBarry Smith   /* Free cache space */
21778b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2181eb62cbbSBarry Smith 
2191eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2201eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2211eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2221eb62cbbSBarry Smith   aij->rmax       = nmax;
2231eb62cbbSBarry Smith 
2241eb62cbbSBarry Smith   return 0;
2251eb62cbbSBarry Smith }
22644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2271eb62cbbSBarry Smith 
228fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2291eb62cbbSBarry Smith {
23044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
231dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2321eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
233416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
234416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2351eb62cbbSBarry Smith   Scalar      *values,val;
2361eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2371eb62cbbSBarry Smith 
2381eb62cbbSBarry Smith   /*  wait on receives */
2391eb62cbbSBarry Smith   while (count) {
240d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2411eb62cbbSBarry Smith     /* unpack receives into our local space */
242d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
243c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2441eb62cbbSBarry Smith     n = n/3;
2451eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
2461eb62cbbSBarry Smith       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
2471eb62cbbSBarry Smith       col = (int) PETSCREAL(values[3*i+1]);
2481eb62cbbSBarry Smith       val = values[3*i+2];
2491eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2501eb62cbbSBarry Smith         col -= aij->cstart;
2511eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2521eb62cbbSBarry Smith       }
2531eb62cbbSBarry Smith       else {
254d6dfbf8fSBarry Smith         if (aij->assembled) {
255b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
256dbb450caSBarry Smith           col = aij->colmap[col] + shift;
257ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2582493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2592493cbb0SBarry Smith             col = (int) PETSCREAL(values[3*i+1]);
260d6dfbf8fSBarry Smith           }
2619e25ed09SBarry Smith         }
2621eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2631eb62cbbSBarry Smith       }
2641eb62cbbSBarry Smith     }
2651eb62cbbSBarry Smith     count--;
2661eb62cbbSBarry Smith   }
2670452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2681eb62cbbSBarry Smith 
2691eb62cbbSBarry Smith   /* wait on sends */
2701eb62cbbSBarry Smith   if (aij->nsends) {
2710452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
27278b31e54SBarry Smith     CHKPTRQ(send_status);
2731eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2740452661fSBarry Smith     PetscFree(send_status);
2751eb62cbbSBarry Smith   }
2760452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2771eb62cbbSBarry Smith 
27864119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
27978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
28078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2811eb62cbbSBarry Smith 
2822493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2832493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
284416022c9SBarry Smith   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
2852493cbb0SBarry Smith   if (aij->assembled && !other_disassembled) {
2862493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2872493cbb0SBarry Smith   }
2882493cbb0SBarry Smith 
2895e42470aSBarry Smith   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
29078b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
291d6dfbf8fSBarry Smith     aij->assembled = 1;
2925e42470aSBarry Smith   }
29378b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
29478b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2955e42470aSBarry Smith 
2968a729477SBarry Smith   return 0;
2978a729477SBarry Smith }
2988a729477SBarry Smith 
2992ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3001eb62cbbSBarry Smith {
30144a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
302dbd7a890SLois Curfman McInnes   int        ierr;
30378b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
30478b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3051eb62cbbSBarry Smith   return 0;
3061eb62cbbSBarry Smith }
3071eb62cbbSBarry Smith 
3081eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3091eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3101eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3111eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3121eb62cbbSBarry Smith    routine.
3131eb62cbbSBarry Smith */
31444a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3151eb62cbbSBarry Smith {
31644a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
31717699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3186abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
31917699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3205392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
321d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
322d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3231eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3241eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3251eb62cbbSBarry Smith   IS             istmp;
3261eb62cbbSBarry Smith 
32748d91487SBarry Smith   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
32878b31e54SBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
32978b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3301eb62cbbSBarry Smith 
3311eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3320452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
333cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3340452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3351eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3361eb62cbbSBarry Smith     idx = rows[i];
3371eb62cbbSBarry Smith     found = 0;
33817699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3391eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3401eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3411eb62cbbSBarry Smith       }
3421eb62cbbSBarry Smith     }
343bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3441eb62cbbSBarry Smith   }
34517699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3461eb62cbbSBarry Smith 
3471eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3480452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
34917699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
35017699dbbSLois Curfman McInnes   nrecvs = work[rank];
35117699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
35217699dbbSLois Curfman McInnes   nmax = work[rank];
3530452661fSBarry Smith   PetscFree(work);
3541eb62cbbSBarry Smith 
3551eb62cbbSBarry Smith   /* post receives:   */
3560452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
35778b31e54SBarry Smith   CHKPTRQ(rvalues);
3580452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
35978b31e54SBarry Smith   CHKPTRQ(recv_waits);
3601eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
361416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3621eb62cbbSBarry Smith   }
3631eb62cbbSBarry Smith 
3641eb62cbbSBarry Smith   /* do sends:
3651eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3661eb62cbbSBarry Smith          the ith processor
3671eb62cbbSBarry Smith   */
3680452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3690452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
37078b31e54SBarry Smith   CHKPTRQ(send_waits);
3710452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3721eb62cbbSBarry Smith   starts[0] = 0;
37317699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3741eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3751eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3761eb62cbbSBarry Smith   }
3771eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3781eb62cbbSBarry Smith 
3791eb62cbbSBarry Smith   starts[0] = 0;
38017699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3811eb62cbbSBarry Smith   count = 0;
38217699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3831eb62cbbSBarry Smith     if (procs[i]) {
384416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3851eb62cbbSBarry Smith     }
3861eb62cbbSBarry Smith   }
3870452661fSBarry Smith   PetscFree(starts);
3881eb62cbbSBarry Smith 
38917699dbbSLois Curfman McInnes   base = owners[rank];
3901eb62cbbSBarry Smith 
3911eb62cbbSBarry Smith   /*  wait on receives */
3920452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3931eb62cbbSBarry Smith   source = lens + nrecvs;
3941eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3951eb62cbbSBarry Smith   while (count) {
396d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3971eb62cbbSBarry Smith     /* unpack receives into our local space */
3981eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
399d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
400d6dfbf8fSBarry Smith     lens[imdex]  = n;
4011eb62cbbSBarry Smith     slen += n;
4021eb62cbbSBarry Smith     count--;
4031eb62cbbSBarry Smith   }
4040452661fSBarry Smith   PetscFree(recv_waits);
4051eb62cbbSBarry Smith 
4061eb62cbbSBarry Smith   /* move the data into the send scatter */
4070452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4081eb62cbbSBarry Smith   count = 0;
4091eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4101eb62cbbSBarry Smith     values = rvalues + i*nmax;
4111eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4121eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4131eb62cbbSBarry Smith     }
4141eb62cbbSBarry Smith   }
4150452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4160452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4171eb62cbbSBarry Smith 
4181eb62cbbSBarry Smith   /* actually zap the local rows */
419416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
420464493b3SBarry Smith   PLogObjectParent(A,istmp);
4210452661fSBarry Smith   PetscFree(lrows);
42278b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
42378b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
42478b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4251eb62cbbSBarry Smith 
4261eb62cbbSBarry Smith   /* wait on sends */
4271eb62cbbSBarry Smith   if (nsends) {
4280452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
42978b31e54SBarry Smith     CHKPTRQ(send_status);
4301eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4310452661fSBarry Smith     PetscFree(send_status);
4321eb62cbbSBarry Smith   }
4330452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4341eb62cbbSBarry Smith 
4351eb62cbbSBarry Smith   return 0;
4361eb62cbbSBarry Smith }
4371eb62cbbSBarry Smith 
438416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4391eb62cbbSBarry Smith {
440416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
4411eb62cbbSBarry Smith   int        ierr;
442416022c9SBarry Smith 
44348d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
44464119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
445416022c9SBarry Smith   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
44664119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
447416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4481eb62cbbSBarry Smith   return 0;
4491eb62cbbSBarry Smith }
4501eb62cbbSBarry Smith 
451416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
452da3a660dSBarry Smith {
453416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
454da3a660dSBarry Smith   int        ierr;
45548d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
45664119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
457416022c9SBarry Smith   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
45864119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
459416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
460da3a660dSBarry Smith   return 0;
461da3a660dSBarry Smith }
462da3a660dSBarry Smith 
463416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
464da3a660dSBarry Smith {
465416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
466da3a660dSBarry Smith   int        ierr;
467da3a660dSBarry Smith 
46848d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
469da3a660dSBarry Smith   /* do nondiagonal part */
470416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
471da3a660dSBarry Smith   /* send it on its way */
472416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
47364119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
474da3a660dSBarry Smith   /* do local part */
475416022c9SBarry Smith   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
476da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
477da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
478da3a660dSBarry Smith   /* but is not perhaps always true. */
479416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
48064119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
481da3a660dSBarry Smith   return 0;
482da3a660dSBarry Smith }
483da3a660dSBarry Smith 
484416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
485da3a660dSBarry Smith {
486416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
487da3a660dSBarry Smith   int        ierr;
488da3a660dSBarry Smith 
48948d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
490da3a660dSBarry Smith   /* do nondiagonal part */
491416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
492da3a660dSBarry Smith   /* send it on its way */
493416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
49464119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
495da3a660dSBarry Smith   /* do local part */
496416022c9SBarry Smith   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
497da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
498da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
499da3a660dSBarry Smith   /* but is not perhaps always true. */
500416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
50164119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
502da3a660dSBarry Smith   return 0;
503da3a660dSBarry Smith }
504da3a660dSBarry Smith 
5051eb62cbbSBarry Smith /*
5061eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5071eb62cbbSBarry Smith    diagonal block
5081eb62cbbSBarry Smith */
509416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5101eb62cbbSBarry Smith {
511416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
51248d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
513416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5141eb62cbbSBarry Smith }
5151eb62cbbSBarry Smith 
516*052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
517*052efed2SBarry Smith {
518*052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
519*052efed2SBarry Smith   int        ierr;
520*052efed2SBarry Smith   if (!a->assembled) SETERRQ(1,"MatScale_MPIAIJ:must assemble matrix");
521*052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
522*052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
523*052efed2SBarry Smith   return 0;
524*052efed2SBarry Smith }
525*052efed2SBarry Smith 
52644a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5271eb62cbbSBarry Smith {
5281eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
52944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5301eb62cbbSBarry Smith   int        ierr;
531a5a9c739SBarry Smith #if defined(PETSC_LOG)
5326e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
533a5a9c739SBarry Smith #endif
5340452661fSBarry Smith   PetscFree(aij->rowners);
53578b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
53678b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5370452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5380452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5391eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
540a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5410452661fSBarry Smith   PetscFree(aij);
542a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5430452661fSBarry Smith   PetscHeaderDestroy(mat);
5441eb62cbbSBarry Smith   return 0;
5451eb62cbbSBarry Smith }
5467857610eSBarry Smith #include "draw.h"
547b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
548ee50ffe9SBarry Smith 
549416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5501eb62cbbSBarry Smith {
551416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
552416022c9SBarry Smith   int         ierr;
553416022c9SBarry Smith 
55417699dbbSLois Curfman McInnes   if (aij->size == 1) {
555416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
556416022c9SBarry Smith   }
557a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
558416022c9SBarry Smith   return 0;
559416022c9SBarry Smith }
560416022c9SBarry Smith 
561d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
562416022c9SBarry Smith {
56344a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
564dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
565a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
566d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
567d636dbe3SBarry Smith   FILE        *fd;
568416022c9SBarry Smith 
569416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
570416022c9SBarry Smith     ierr = ViewerFileGetFormat_Private(viewer,&format);
57108480c60SBarry Smith     if (format == FILE_FORMAT_INFO_DETAILED) {
572a56f8943SBarry Smith       int nz,nzalloc,mem;
573a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
574a56f8943SBarry Smith       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
575a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
576a56f8943SBarry Smith       MPIU_Seq_begin(mat->comm,1);
577a56f8943SBarry Smith       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
578a56f8943SBarry Smith                 nzalloc,mem);
57908480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
58008480c60SBarry Smith       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
58108480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
58208480c60SBarry Smith       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
583a56f8943SBarry Smith       fflush(fd);
584a56f8943SBarry Smith       MPIU_Seq_end(mat->comm,1);
585a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
586416022c9SBarry Smith       return 0;
587416022c9SBarry Smith     }
58808480c60SBarry Smith     else if (format == FILE_FORMAT_INFO) {
58908480c60SBarry Smith       return 0;
59008480c60SBarry Smith     }
591416022c9SBarry Smith   }
592416022c9SBarry Smith 
593416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER) {
594d636dbe3SBarry Smith     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
5957f813858SBarry Smith     MPIU_Seq_begin(mat->comm,1);
596d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
59717699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5981eb62cbbSBarry Smith            aij->cend);
59978b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
60078b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
601d13ab20cSBarry Smith     fflush(fd);
6027f813858SBarry Smith     MPIU_Seq_end(mat->comm,1);
603d13ab20cSBarry Smith   }
604416022c9SBarry Smith   else {
605a56f8943SBarry Smith     int size = aij->size;
606a56f8943SBarry Smith     rank = aij->rank;
60717699dbbSLois Curfman McInnes     if (size == 1) {
60878b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
60995373324SBarry Smith     }
61095373324SBarry Smith     else {
61195373324SBarry Smith       /* assemble the entire matrix onto first processor. */
61295373324SBarry Smith       Mat         A;
613ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6142eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
61595373324SBarry Smith       Scalar      *a;
6162ee70a88SLois Curfman McInnes 
61717699dbbSLois Curfman McInnes       if (!rank) {
618b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
619c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
62095373324SBarry Smith       }
62195373324SBarry Smith       else {
622b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
623c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
62495373324SBarry Smith       }
625464493b3SBarry Smith       PLogObjectParent(mat,A);
626416022c9SBarry Smith 
62795373324SBarry Smith       /* copy over the A part */
628ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6292ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
63095373324SBarry Smith       row = aij->rstart;
631dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
63295373324SBarry Smith       for ( i=0; i<m; i++ ) {
633416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
63495373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
63595373324SBarry Smith       }
6362ee70a88SLois Curfman McInnes       aj = Aloc->j;
637dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
63895373324SBarry Smith 
63995373324SBarry Smith       /* copy over the B part */
640ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6412ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
64295373324SBarry Smith       row = aij->rstart;
6430452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
644dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
64595373324SBarry Smith       for ( i=0; i<m; i++ ) {
646416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
64795373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
64895373324SBarry Smith       }
6490452661fSBarry Smith       PetscFree(ct);
65078b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
65178b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
65217699dbbSLois Curfman McInnes       if (!rank) {
65378b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
65495373324SBarry Smith       }
65578b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
65695373324SBarry Smith     }
65795373324SBarry Smith   }
6581eb62cbbSBarry Smith   return 0;
6591eb62cbbSBarry Smith }
6601eb62cbbSBarry Smith 
661416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
662416022c9SBarry Smith {
663416022c9SBarry Smith   Mat         mat = (Mat) obj;
664416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
665416022c9SBarry Smith   int         ierr;
666416022c9SBarry Smith   PetscObject vobj = (PetscObject) viewer;
667416022c9SBarry Smith 
66848d91487SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
669416022c9SBarry Smith   if (!viewer) {
670416022c9SBarry Smith     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
671416022c9SBarry Smith   }
672416022c9SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
673416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
674d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
675416022c9SBarry Smith   }
676416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
677d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
678d7e8b826SBarry Smith   }
679d7e8b826SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
680d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
681416022c9SBarry Smith   }
682416022c9SBarry Smith   else if (vobj->cookie == DRAW_COOKIE) {
683d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
684416022c9SBarry Smith   }
685416022c9SBarry Smith   else if (vobj->type == BINARY_FILE_VIEWER) {
686416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
687416022c9SBarry Smith   }
688416022c9SBarry Smith   return 0;
689416022c9SBarry Smith }
690416022c9SBarry Smith 
691ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat);
6921eb62cbbSBarry Smith /*
6931eb62cbbSBarry Smith     This has to provide several versions.
6941eb62cbbSBarry Smith 
6951eb62cbbSBarry Smith      1) per sequential
6961eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6971eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
698d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
6991eb62cbbSBarry Smith */
700fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
701dbb450caSBarry Smith                            double fshift,int its,Vec xx)
7028a729477SBarry Smith {
70344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
704d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
705ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
7066abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
7076abc6512SBarry Smith   int        ierr,*idx, *diag;
708416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
709da3a660dSBarry Smith   Vec        tt;
7108a729477SBarry Smith 
71148d91487SBarry Smith   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
7121eb62cbbSBarry Smith 
713d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
714dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
715dbb450caSBarry Smith   ls = ls + shift;
716ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
717d6dfbf8fSBarry Smith   diag = A->diag;
718acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
71948d91487SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
720acb40c82SBarry Smith   }
721da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
722da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
723da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
724da3a660dSBarry Smith 
725da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
726da3a660dSBarry Smith 
727da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
728da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
729da3a660dSBarry Smith     is the relaxation factor.
730da3a660dSBarry Smith     */
73178b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
732464493b3SBarry Smith     PLogObjectParent(matin,tt);
733da3a660dSBarry Smith     VecGetArray(tt,&t);
734da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
735da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
736da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
73764119d58SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
73878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
739da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
740da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
741dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
742dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
743da3a660dSBarry Smith       sum  = b[i];
744da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
745dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
746da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
747dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
748dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
749da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
750da3a660dSBarry Smith       x[i] = omega*(sum/d);
751da3a660dSBarry Smith     }
75264119d58SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
75378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
754da3a660dSBarry Smith 
755da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
756da3a660dSBarry Smith     v = A->a;
757dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
758da3a660dSBarry Smith 
759da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
760dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
761da3a660dSBarry Smith     diag = A->diag;
762da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
76364119d58SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
76478b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
765da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
766da3a660dSBarry Smith       n    = diag[i] - A->i[i];
767dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
768dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
769da3a660dSBarry Smith       sum  = t[i];
770da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
771dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
772da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
773dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
774dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
775da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
776da3a660dSBarry Smith       t[i] = omega*(sum/d);
777da3a660dSBarry Smith     }
77864119d58SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
77978b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
780da3a660dSBarry Smith     /*  x = x + t */
781da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
782da3a660dSBarry Smith     VecDestroy(tt);
783da3a660dSBarry Smith     return 0;
784da3a660dSBarry Smith   }
785da3a660dSBarry Smith 
7861eb62cbbSBarry Smith 
787d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
788da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
789da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
790da3a660dSBarry Smith     }
791da3a660dSBarry Smith     else {
79264119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
79378b31e54SBarry Smith       CHKERRQ(ierr);
79464119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
79578b31e54SBarry Smith       CHKERRQ(ierr);
796da3a660dSBarry Smith     }
797d6dfbf8fSBarry Smith     while (its--) {
798d6dfbf8fSBarry Smith       /* go down through the rows */
79964119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
80078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
801d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
802d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
803dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
804dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
805d6dfbf8fSBarry Smith         sum  = b[i];
806d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
807dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
808d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
809dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
810dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
811d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
812d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
813d6dfbf8fSBarry Smith       }
81464119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
81578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
816d6dfbf8fSBarry Smith       /* come up through the rows */
81764119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
81878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
819d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
820d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
821dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
822dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
823d6dfbf8fSBarry Smith         sum  = b[i];
824d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
825dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
826d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
827dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
828dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
829d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
830d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
831d6dfbf8fSBarry Smith       }
83264119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
83378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
834d6dfbf8fSBarry Smith     }
835d6dfbf8fSBarry Smith   }
836d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
837da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
838da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
83964119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
84078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
841da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
842da3a660dSBarry Smith         n    = diag[i] - A->i[i];
843dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
844dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
845da3a660dSBarry Smith         sum  = b[i];
846da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
847dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
848da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
849dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
850dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
851da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
852da3a660dSBarry Smith         x[i] = omega*(sum/d);
853da3a660dSBarry Smith       }
85464119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
85578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
856da3a660dSBarry Smith       its--;
857da3a660dSBarry Smith     }
858d6dfbf8fSBarry Smith     while (its--) {
85964119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
86078b31e54SBarry Smith       CHKERRQ(ierr);
86164119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
86278b31e54SBarry Smith       CHKERRQ(ierr);
86364119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
86478b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
865d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
866d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
867dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
868dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
869d6dfbf8fSBarry Smith         sum  = b[i];
870d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
871dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
872d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
873dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
874dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
875d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
876d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
877d6dfbf8fSBarry Smith       }
87864119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
87978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
880d6dfbf8fSBarry Smith     }
881d6dfbf8fSBarry Smith   }
882d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
883da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
884da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
88564119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
88678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
887da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
888da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
889dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
890dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
891da3a660dSBarry Smith         sum  = b[i];
892da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
893dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
894da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
895dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
896dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
897da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
898da3a660dSBarry Smith         x[i] = omega*(sum/d);
899da3a660dSBarry Smith       }
90064119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
90178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
902da3a660dSBarry Smith       its--;
903da3a660dSBarry Smith     }
904d6dfbf8fSBarry Smith     while (its--) {
90564119d58SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
90678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
90764119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
90878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
90964119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
91078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
911d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
912d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
913dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
914dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
915d6dfbf8fSBarry Smith         sum  = b[i];
916d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
917dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
918d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
919dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
920dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
921d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
922d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
923d6dfbf8fSBarry Smith       }
92464119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
92578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
926d6dfbf8fSBarry Smith     }
927d6dfbf8fSBarry Smith   }
928d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
929da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
930dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
931da3a660dSBarry Smith     }
93264119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
93378b31e54SBarry Smith     CHKERRQ(ierr);
93464119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
93578b31e54SBarry Smith     CHKERRQ(ierr);
936d6dfbf8fSBarry Smith     while (its--) {
937d6dfbf8fSBarry Smith       /* go down through the rows */
938d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
939d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
940dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
941dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
942d6dfbf8fSBarry Smith         sum  = b[i];
943d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
944dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
945d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
946dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
947dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
948d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
949d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
950d6dfbf8fSBarry Smith       }
951d6dfbf8fSBarry Smith       /* come up through the rows */
952d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
953d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
954dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
955dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
956d6dfbf8fSBarry Smith         sum  = b[i];
957d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
958dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
959d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
960dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
961dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
962d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
963d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
964d6dfbf8fSBarry Smith       }
965d6dfbf8fSBarry Smith     }
966d6dfbf8fSBarry Smith   }
967d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
968da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
969dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
970da3a660dSBarry Smith     }
97164119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
97278b31e54SBarry Smith     CHKERRQ(ierr);
97364119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
97478b31e54SBarry Smith     CHKERRQ(ierr);
975d6dfbf8fSBarry Smith     while (its--) {
976d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
977d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
978dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
979dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
980d6dfbf8fSBarry Smith         sum  = b[i];
981d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
982dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
983d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
984dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
985dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
986d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
987d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
988d6dfbf8fSBarry Smith       }
989d6dfbf8fSBarry Smith     }
990d6dfbf8fSBarry Smith   }
991d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
992da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
993dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
994da3a660dSBarry Smith     }
99564119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
99678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
99764119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
99878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
999d6dfbf8fSBarry Smith     while (its--) {
1000d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
1001d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
1002dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
1003dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
1004d6dfbf8fSBarry Smith         sum  = b[i];
1005d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1006dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
1007d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
1008dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
1009dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
1010d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1011d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1012d6dfbf8fSBarry Smith       }
1013d6dfbf8fSBarry Smith     }
1014d6dfbf8fSBarry Smith   }
10158a729477SBarry Smith   return 0;
10168a729477SBarry Smith }
1017a66be287SLois Curfman McInnes 
1018fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1019fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
1020a66be287SLois Curfman McInnes {
1021a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1022a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
1023a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1024a66be287SLois Curfman McInnes 
102578b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
102678b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1027a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1028a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
1029a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1030a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1031d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1032a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1033a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1034d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1035a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1036a66be287SLois Curfman McInnes   }
1037a66be287SLois Curfman McInnes   return 0;
1038a66be287SLois Curfman McInnes }
1039a66be287SLois Curfman McInnes 
1040299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1041299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1042299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1043299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1044299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1045299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1046299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1047299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1048299609e3SLois Curfman McInnes 
1049416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1050c74985f6SBarry Smith {
1051c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1052c74985f6SBarry Smith 
1053c0bbcb79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
1054c0bbcb79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
1055c0bbcb79SLois Curfman McInnes       op == COLUMNS_SORTED ||
1056c0bbcb79SLois Curfman McInnes       op == ROW_ORIENTED) {
1057c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
1058c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
1059c74985f6SBarry Smith   }
1060c0bbcb79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
1061c0bbcb79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
1062c0bbcb79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1063c0bbcb79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
1064c0bbcb79SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10654b0e389bSBarry Smith   else if (op == COLUMN_ORIENTED) {
10664b0e389bSBarry Smith     a->roworiented = 0;
10674b0e389bSBarry Smith     MatSetOption(a->A,op);
10684b0e389bSBarry Smith     MatSetOption(a->B,op);
10694b0e389bSBarry Smith   }
1070c0bbcb79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
10714d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1072c0bbcb79SLois Curfman McInnes   else
10734d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1074c74985f6SBarry Smith   return 0;
1075c74985f6SBarry Smith }
1076c74985f6SBarry Smith 
1077d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1078c74985f6SBarry Smith {
107944a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1080c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1081c74985f6SBarry Smith   return 0;
1082c74985f6SBarry Smith }
1083c74985f6SBarry Smith 
1084d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1085c74985f6SBarry Smith {
108644a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1087b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1088c74985f6SBarry Smith   return 0;
1089c74985f6SBarry Smith }
1090c74985f6SBarry Smith 
1091d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1092c74985f6SBarry Smith {
109344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1094c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1095c74985f6SBarry Smith   return 0;
1096c74985f6SBarry Smith }
1097c74985f6SBarry Smith 
109839e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
109939e00950SLois Curfman McInnes {
1100154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1101154123eaSLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1102154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1103154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
110439e00950SLois Curfman McInnes 
1105b49de8d1SLois Curfman McInnes   if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix");
1106b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1107abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
110839e00950SLois Curfman McInnes 
1109154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1110154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1111154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
111278b31e54SBarry Smith   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
111378b31e54SBarry Smith   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1114154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1115154123eaSLois Curfman McInnes 
1116154123eaSLois Curfman McInnes   if (v  || idx) {
1117154123eaSLois Curfman McInnes     if (nztot) {
1118154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1119299609e3SLois Curfman McInnes       int imark;
112048b35521SBarry Smith       if (mat->assembled) {
1121154123eaSLois Curfman McInnes         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
112248b35521SBarry Smith       }
1123154123eaSLois Curfman McInnes       if (v) {
11240452661fSBarry Smith         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
112539e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1126154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1127154123eaSLois Curfman McInnes           else break;
1128154123eaSLois Curfman McInnes         }
1129154123eaSLois Curfman McInnes         imark = i;
1130154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1131299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1132154123eaSLois Curfman McInnes       }
1133154123eaSLois Curfman McInnes       if (idx) {
11340452661fSBarry Smith         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1135154123eaSLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1136154123eaSLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1137154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1138154123eaSLois Curfman McInnes           else break;
1139154123eaSLois Curfman McInnes         }
1140154123eaSLois Curfman McInnes         imark = i;
1141154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1142299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
114339e00950SLois Curfman McInnes       }
114439e00950SLois Curfman McInnes     }
114539e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1146154123eaSLois Curfman McInnes   }
114739e00950SLois Curfman McInnes   *nz = nztot;
114878b31e54SBarry Smith   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
114978b31e54SBarry Smith   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
115039e00950SLois Curfman McInnes   return 0;
115139e00950SLois Curfman McInnes }
115239e00950SLois Curfman McInnes 
115339e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
115439e00950SLois Curfman McInnes {
11550452661fSBarry Smith   if (idx) PetscFree(*idx);
11560452661fSBarry Smith   if (v) PetscFree(*v);
115739e00950SLois Curfman McInnes   return 0;
115839e00950SLois Curfman McInnes }
115939e00950SLois Curfman McInnes 
1160cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1161855ac2c5SLois Curfman McInnes {
1162855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1163ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1164416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1165416022c9SBarry Smith   double     sum = 0.0;
116604ca555eSLois Curfman McInnes   Scalar     *v;
116704ca555eSLois Curfman McInnes 
1168416022c9SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
116917699dbbSLois Curfman McInnes   if (aij->size == 1) {
117014183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
117137fa93a5SLois Curfman McInnes   } else {
117204ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
117304ca555eSLois Curfman McInnes       v = amat->a;
117404ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
117504ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
117604ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
117704ca555eSLois Curfman McInnes #else
117804ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
117904ca555eSLois Curfman McInnes #endif
118004ca555eSLois Curfman McInnes       }
118104ca555eSLois Curfman McInnes       v = bmat->a;
118204ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
118304ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
118404ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
118504ca555eSLois Curfman McInnes #else
118604ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
118704ca555eSLois Curfman McInnes #endif
118804ca555eSLois Curfman McInnes       }
118904ca555eSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
119004ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
119104ca555eSLois Curfman McInnes     }
119204ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
119304ca555eSLois Curfman McInnes       double *tmp, *tmp2;
119404ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
11950452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
11960452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1197cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
119804ca555eSLois Curfman McInnes       *norm = 0.0;
119904ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
120004ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1201579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
120204ca555eSLois Curfman McInnes       }
120304ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
120404ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1205579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
120604ca555eSLois Curfman McInnes       }
120704ca555eSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
120804ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
120904ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
121004ca555eSLois Curfman McInnes       }
12110452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
121204ca555eSLois Curfman McInnes     }
121304ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1214515d9167SLois Curfman McInnes       double ntemp = 0.0;
121504ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1216dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
121704ca555eSLois Curfman McInnes         sum = 0.0;
121804ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1219cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
122004ca555eSLois Curfman McInnes         }
1221dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
122204ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1223cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
122404ca555eSLois Curfman McInnes         }
1225515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
122604ca555eSLois Curfman McInnes       }
1227515d9167SLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
122804ca555eSLois Curfman McInnes     }
122904ca555eSLois Curfman McInnes     else {
1230515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
123104ca555eSLois Curfman McInnes     }
123237fa93a5SLois Curfman McInnes   }
1233855ac2c5SLois Curfman McInnes   return 0;
1234855ac2c5SLois Curfman McInnes }
1235855ac2c5SLois Curfman McInnes 
12360de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1237b7c46309SBarry Smith {
1238b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1239dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1240416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1241416022c9SBarry Smith   Mat        B;
1242b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1243b7c46309SBarry Smith   Scalar     *array;
1244b7c46309SBarry Smith 
1245416022c9SBarry Smith   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1246b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1247b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1248b7c46309SBarry Smith 
1249b7c46309SBarry Smith   /* copy over the A part */
1250ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1251b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1252b7c46309SBarry Smith   row = a->rstart;
1253dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1254b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1255416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1256b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1257b7c46309SBarry Smith   }
1258b7c46309SBarry Smith   aj = Aloc->j;
1259dbb450caSBarry Smith   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1260b7c46309SBarry Smith 
1261b7c46309SBarry Smith   /* copy over the B part */
1262ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1263b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1264b7c46309SBarry Smith   row = a->rstart;
12650452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1266dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1267b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1268416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1269b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1270b7c46309SBarry Smith   }
12710452661fSBarry Smith   PetscFree(ct);
1272b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1273b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
12740de55854SLois Curfman McInnes   if (matout) {
12750de55854SLois Curfman McInnes     *matout = B;
12760de55854SLois Curfman McInnes   } else {
12770de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12780452661fSBarry Smith     PetscFree(a->rowners);
12790de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
12800de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
12810452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
12820452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
12830de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1284a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
12850452661fSBarry Smith     PetscFree(a);
1286416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
12870452661fSBarry Smith     PetscHeaderDestroy(B);
12880de55854SLois Curfman McInnes   }
1289b7c46309SBarry Smith   return 0;
1290b7c46309SBarry Smith }
1291b7c46309SBarry Smith 
1292682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1293682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1294682d7d0cSBarry Smith {
1295682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1296682d7d0cSBarry Smith 
1297682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1298682d7d0cSBarry Smith   else return 0;
1299682d7d0cSBarry Smith }
1300682d7d0cSBarry Smith 
1301fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
13023d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1303d6dfbf8fSBarry Smith 
13048a729477SBarry Smith /* -------------------------------------------------------------------*/
13052ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
130639e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
130744a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
130844a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1309299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1310299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1311299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
131244a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1313b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1314a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1315855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1316ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13171eb62cbbSBarry Smith        0,
1318299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1319299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1320299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1321d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1322299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
13233d1612f7SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1324b49de8d1SLois Curfman McInnes        0,0,0,
1325682d7d0cSBarry Smith        0,0,MatGetValues_MPIAIJ,0,
1326*052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
1327*052efed2SBarry Smith        MatScale_MPIAIJ};
13288a729477SBarry Smith 
13291987afe7SBarry Smith /*@C
1330ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1331ff756334SLois Curfman McInnes    (the default parallel PETSc format).
13328a729477SBarry Smith 
13338a729477SBarry Smith    Input Parameters:
13341eb62cbbSBarry Smith .  comm - MPI communicator
13357d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
13367d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
13377d3e4905SLois Curfman McInnes            if N is given)
13387d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
13397d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
13407d3e4905SLois Curfman McInnes            if n is given)
1341ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1342ff756334SLois Curfman McInnes            (same for all local rows)
1343ab693e5aSLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1344ab693e5aSLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1345ab693e5aSLois Curfman McInnes            for the diagonal entry even if it is zero.
1346ab693e5aSLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1347ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
1348ab693e5aSLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1349ab693e5aSLois Curfman McInnes            submatrix or null (possibly different for each row).
13508a729477SBarry Smith 
1351ff756334SLois Curfman McInnes    Output Parameter:
13528a729477SBarry Smith .  newmat - the matrix
13538a729477SBarry Smith 
1354ff756334SLois Curfman McInnes    Notes:
1355ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1356ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
13570002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
13580002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1359ff756334SLois Curfman McInnes 
1360ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1361ff756334SLois Curfman McInnes    (possibly both).
1362ff756334SLois Curfman McInnes 
1363e0245417SLois Curfman McInnes    Storage Information:
1364e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1365e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1366e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1367e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1368e0245417SLois Curfman McInnes 
1369e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
13705ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
13715ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
13725ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
13735ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1374ff756334SLois Curfman McInnes 
1375c451ab25SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
1376c451ab25SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
1377c451ab25SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
1378c451ab25SLois Curfman McInnes 
1379c451ab25SLois Curfman McInnes    Options Database Keys:
1380c451ab25SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
1381c451ab25SLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit (max limit=5)
1382c451ab25SLois Curfman McInnes 
1383dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1384ff756334SLois Curfman McInnes 
1385fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
13868a729477SBarry Smith @*/
13871eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
13881eb62cbbSBarry Smith                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
13898a729477SBarry Smith {
13908a729477SBarry Smith   Mat          mat;
1391416022c9SBarry Smith   Mat_MPIAIJ   *a;
13926abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1393416022c9SBarry Smith 
13948a729477SBarry Smith   *newmat         = 0;
13950452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1396a5a9c739SBarry Smith   PLogObjectCreate(mat);
13970452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1398416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
139944a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
140044a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
14018a729477SBarry Smith   mat->factor     = 0;
1402d6dfbf8fSBarry Smith 
140364119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
140417699dbbSLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
140517699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
14061eb62cbbSBarry Smith 
1407b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
14081987afe7SBarry Smith     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
14091987afe7SBarry Smith 
1410dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
14111eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1412d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1413dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1414dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
14151eb62cbbSBarry Smith   }
141617699dbbSLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
141717699dbbSLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1418416022c9SBarry Smith   a->m = m;
1419416022c9SBarry Smith   a->n = n;
1420416022c9SBarry Smith   a->N = N;
1421416022c9SBarry Smith   a->M = M;
14221eb62cbbSBarry Smith 
14231eb62cbbSBarry Smith   /* build local table of row and column ownerships */
14240452661fSBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1425579c6b6fSBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
142617699dbbSLois Curfman McInnes   a->cowners = a->rowners + a->size + 1;
1427416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1428416022c9SBarry Smith   a->rowners[0] = 0;
142917699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1430416022c9SBarry Smith     a->rowners[i] += a->rowners[i-1];
14318a729477SBarry Smith   }
143217699dbbSLois Curfman McInnes   a->rstart = a->rowners[a->rank];
143317699dbbSLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
1434416022c9SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1435416022c9SBarry Smith   a->cowners[0] = 0;
143617699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1437416022c9SBarry Smith     a->cowners[i] += a->cowners[i-1];
14388a729477SBarry Smith   }
143917699dbbSLois Curfman McInnes   a->cstart = a->cowners[a->rank];
144017699dbbSLois Curfman McInnes   a->cend   = a->cowners[a->rank+1];
14418a729477SBarry Smith 
14425ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1443416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1444416022c9SBarry Smith   PLogObjectParent(mat,a->A);
14457b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1446416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1447416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14488a729477SBarry Smith 
14491eb62cbbSBarry Smith   /* build cache for off array entries formed */
1450416022c9SBarry Smith   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1451416022c9SBarry Smith   a->colmap      = 0;
1452416022c9SBarry Smith   a->garray      = 0;
14534b0e389bSBarry Smith   a->roworiented = 1;
14548a729477SBarry Smith 
14551eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
1456416022c9SBarry Smith   a->lvec      = 0;
1457416022c9SBarry Smith   a->Mvctx     = 0;
1458416022c9SBarry Smith   a->assembled = 0;
14598a729477SBarry Smith 
1460d6dfbf8fSBarry Smith   *newmat = mat;
1461d6dfbf8fSBarry Smith   return 0;
1462d6dfbf8fSBarry Smith }
1463c74985f6SBarry Smith 
14643d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1465d6dfbf8fSBarry Smith {
1466d6dfbf8fSBarry Smith   Mat        mat;
1467416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
146825cdf11fSBarry Smith   int        ierr, len,flg;
1469d6dfbf8fSBarry Smith 
14703d1612f7SBarry Smith   if (!oldmat->assembled) SETERRQ(1,"MatConvertSameType_MPIAIJ:Must assemble matrix");
1471416022c9SBarry Smith   *newmat       = 0;
14720452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1473a5a9c739SBarry Smith   PLogObjectCreate(mat);
14740452661fSBarry Smith   mat->data     = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1475416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
147644a69424SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIAIJ;
147744a69424SLois Curfman McInnes   mat->view     = MatView_MPIAIJ;
1478d6dfbf8fSBarry Smith   mat->factor   = matin->factor;
1479d6dfbf8fSBarry Smith 
1480416022c9SBarry Smith   a->m          = oldmat->m;
1481416022c9SBarry Smith   a->n          = oldmat->n;
1482416022c9SBarry Smith   a->M          = oldmat->M;
1483416022c9SBarry Smith   a->N          = oldmat->N;
1484d6dfbf8fSBarry Smith 
1485416022c9SBarry Smith   a->assembled  = 1;
1486416022c9SBarry Smith   a->rstart     = oldmat->rstart;
1487416022c9SBarry Smith   a->rend       = oldmat->rend;
1488416022c9SBarry Smith   a->cstart     = oldmat->cstart;
1489416022c9SBarry Smith   a->cend       = oldmat->cend;
149017699dbbSLois Curfman McInnes   a->size       = oldmat->size;
149117699dbbSLois Curfman McInnes   a->rank       = oldmat->rank;
149264119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
1493d6dfbf8fSBarry Smith 
14940452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
149517699dbbSLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
149617699dbbSLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1497416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14982ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
14990452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1500416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1501416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1502416022c9SBarry Smith   } else a->colmap = 0;
1503ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
15040452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1505464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1506416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1507416022c9SBarry Smith   } else a->garray = 0;
1508d6dfbf8fSBarry Smith 
1509416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1510416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1511a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1512416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1513416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1514416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1515416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1516416022c9SBarry Smith   PLogObjectParent(mat,a->B);
15175dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
151825cdf11fSBarry Smith   if (flg) {
1519682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1520682d7d0cSBarry Smith   }
15218a729477SBarry Smith   *newmat = mat;
15228a729477SBarry Smith   return 0;
15238a729477SBarry Smith }
1524416022c9SBarry Smith 
1525416022c9SBarry Smith #include "sysio.h"
1526416022c9SBarry Smith 
152702834360SBarry Smith int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1528416022c9SBarry Smith {
1529d65a2f8fSBarry Smith   Mat          A;
1530416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1531d65a2f8fSBarry Smith   Scalar       *vals,*svals;
1532416022c9SBarry Smith   PetscObject  vobj = (PetscObject) bview;
1533416022c9SBarry Smith   MPI_Comm     comm = vobj->comm;
1534416022c9SBarry Smith   MPI_Status   status;
153517699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1536d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1537d65a2f8fSBarry Smith   int          tag = ((PetscObject)bview)->tag;
1538416022c9SBarry Smith 
153917699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
154017699dbbSLois Curfman McInnes   if (!rank) {
1541416022c9SBarry Smith     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1542416022c9SBarry Smith     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
154302834360SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1544416022c9SBarry Smith   }
1545416022c9SBarry Smith 
1546416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1547416022c9SBarry Smith   M = header[1]; N = header[2];
1548416022c9SBarry Smith   /* determine ownership of all rows */
154917699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
15500452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1551416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1552416022c9SBarry Smith   rowners[0] = 0;
155317699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1554416022c9SBarry Smith     rowners[i] += rowners[i-1];
1555416022c9SBarry Smith   }
155617699dbbSLois Curfman McInnes   rstart = rowners[rank];
155717699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1558416022c9SBarry Smith 
1559416022c9SBarry Smith   /* distribute row lengths to all processors */
15600452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1561416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
156217699dbbSLois Curfman McInnes   if (!rank) {
15630452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1564416022c9SBarry Smith     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
15650452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
156617699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1567d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
15680452661fSBarry Smith     PetscFree(sndcounts);
1569416022c9SBarry Smith   }
1570416022c9SBarry Smith   else {
1571416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1572416022c9SBarry Smith   }
1573416022c9SBarry Smith 
157417699dbbSLois Curfman McInnes   if (!rank) {
1575416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
15760452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1577cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
157817699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1579416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1580416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1581416022c9SBarry Smith       }
1582416022c9SBarry Smith     }
15830452661fSBarry Smith     PetscFree(rowlengths);
1584416022c9SBarry Smith 
1585416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1586416022c9SBarry Smith     maxnz = 0;
158717699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
15880452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1589416022c9SBarry Smith     }
15900452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1591416022c9SBarry Smith 
1592416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1593416022c9SBarry Smith     nz = procsnz[0];
15940452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1595d65a2f8fSBarry Smith     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1596d65a2f8fSBarry Smith 
1597d65a2f8fSBarry Smith     /* read in every one elses and ship off */
159817699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1599d65a2f8fSBarry Smith       nz = procsnz[i];
1600416022c9SBarry Smith       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1601d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1602d65a2f8fSBarry Smith     }
16030452661fSBarry Smith     PetscFree(cols);
1604416022c9SBarry Smith   }
1605416022c9SBarry Smith   else {
1606416022c9SBarry Smith     /* determine buffer space needed for message */
1607416022c9SBarry Smith     nz = 0;
1608416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1609416022c9SBarry Smith       nz += ourlens[i];
1610416022c9SBarry Smith     }
16110452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1612416022c9SBarry Smith 
1613416022c9SBarry Smith     /* receive message of column indices*/
1614d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1615416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
161602834360SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1617416022c9SBarry Smith   }
1618416022c9SBarry Smith 
1619416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1620cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1621416022c9SBarry Smith   jj = 0;
1622416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1623416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1624d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1625416022c9SBarry Smith       jj++;
1626416022c9SBarry Smith     }
1627416022c9SBarry Smith   }
1628d65a2f8fSBarry Smith 
1629d65a2f8fSBarry Smith   /* create our matrix */
1630416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1631416022c9SBarry Smith     ourlens[i] -= offlens[i];
1632416022c9SBarry Smith   }
1633d65a2f8fSBarry Smith   if (type == MATMPIAIJ) {
1634d65a2f8fSBarry Smith     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1635d65a2f8fSBarry Smith   }
1636d65a2f8fSBarry Smith   else if (type == MATMPIROW) {
1637d65a2f8fSBarry Smith     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1638d65a2f8fSBarry Smith   }
1639d65a2f8fSBarry Smith   A = *newmat;
1640d65a2f8fSBarry Smith   MatSetOption(A,COLUMNS_SORTED);
1641d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1642d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1643d65a2f8fSBarry Smith   }
1644416022c9SBarry Smith 
164517699dbbSLois Curfman McInnes   if (!rank) {
16460452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1647416022c9SBarry Smith 
1648416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1649416022c9SBarry Smith     nz = procsnz[0];
1650416022c9SBarry Smith     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1651d65a2f8fSBarry Smith 
1652d65a2f8fSBarry Smith     /* insert into matrix */
1653d65a2f8fSBarry Smith     jj      = rstart;
1654d65a2f8fSBarry Smith     smycols = mycols;
1655d65a2f8fSBarry Smith     svals   = vals;
1656d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1657d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1658d65a2f8fSBarry Smith       smycols += ourlens[i];
1659d65a2f8fSBarry Smith       svals   += ourlens[i];
1660d65a2f8fSBarry Smith       jj++;
1661416022c9SBarry Smith     }
1662416022c9SBarry Smith 
1663d65a2f8fSBarry Smith     /* read in other processors and ship out */
166417699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1665416022c9SBarry Smith       nz = procsnz[i];
1666416022c9SBarry Smith       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1667d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1668416022c9SBarry Smith     }
16690452661fSBarry Smith     PetscFree(procsnz);
1670416022c9SBarry Smith   }
1671d65a2f8fSBarry Smith   else {
1672d65a2f8fSBarry Smith     /* receive numeric values */
16730452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1674416022c9SBarry Smith 
1675d65a2f8fSBarry Smith     /* receive message of values*/
1676d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1677d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
167802834360SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1679d65a2f8fSBarry Smith 
1680d65a2f8fSBarry Smith     /* insert into matrix */
1681d65a2f8fSBarry Smith     jj      = rstart;
1682d65a2f8fSBarry Smith     smycols = mycols;
1683d65a2f8fSBarry Smith     svals   = vals;
1684d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1685d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1686d65a2f8fSBarry Smith       smycols += ourlens[i];
1687d65a2f8fSBarry Smith       svals   += ourlens[i];
1688d65a2f8fSBarry Smith       jj++;
1689d65a2f8fSBarry Smith     }
1690d65a2f8fSBarry Smith   }
16910452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1692d65a2f8fSBarry Smith 
1693d65a2f8fSBarry Smith   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1694d65a2f8fSBarry Smith   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1695416022c9SBarry Smith   return 0;
1696416022c9SBarry Smith }
1697