xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 25cdf11f00656cd6142ccc252ca9e2f99af554b2)
1cb512458SBarry Smith #ifndef lint
2*25cdf11fSBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.108 1996/01/04 15:58:58 curfman Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int        ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
408a729477SBarry Smith {
4144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
434b0e389bSBarry Smith   Scalar     value;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
464b0e389bSBarry Smith   int        shift = C->indexshift,roworiented = aij->roworiented;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
534b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
544b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
564b0e389bSBarry Smith       row = im[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
584b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
594b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
604b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
614b0e389bSBarry Smith           col = in[j] - cstart;
624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
634b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
641eb62cbbSBarry Smith         }
651eb62cbbSBarry Smith         else {
66d6dfbf8fSBarry Smith           if (aij->assembled) {
67b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
684b0e389bSBarry Smith             col = aij->colmap[in[j]] + shift;
69ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
702493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
714b0e389bSBarry Smith               col =  in[j];
72d6dfbf8fSBarry Smith             }
739e25ed09SBarry Smith           }
744b0e389bSBarry Smith           else col = in[j];
754b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
764b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
771eb62cbbSBarry Smith         }
781eb62cbbSBarry Smith       }
791eb62cbbSBarry Smith     }
801eb62cbbSBarry Smith     else {
814b0e389bSBarry Smith       if (roworiented) {
824b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
834b0e389bSBarry Smith       }
844b0e389bSBarry Smith       else {
854b0e389bSBarry Smith         row = im[i];
864b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
874b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
884b0e389bSBarry Smith         }
894b0e389bSBarry Smith       }
901eb62cbbSBarry Smith     }
918a729477SBarry Smith   }
928a729477SBarry Smith   return 0;
938a729477SBarry Smith }
948a729477SBarry Smith 
95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96b49de8d1SLois Curfman McInnes {
97b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
101b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
102b49de8d1SLois Curfman McInnes 
103b49de8d1SLois Curfman McInnes   if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix");
104b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
105b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
106b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
107b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
108b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
109b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
110b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
111b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
112b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
113b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
114b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
115b49de8d1SLois Curfman McInnes         }
116b49de8d1SLois Curfman McInnes         else {
117b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
118b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
119b49de8d1SLois Curfman McInnes         }
120b49de8d1SLois Curfman McInnes       }
121b49de8d1SLois Curfman McInnes     }
122b49de8d1SLois Curfman McInnes     else {
123b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
124b49de8d1SLois Curfman McInnes     }
125b49de8d1SLois Curfman McInnes   }
126b49de8d1SLois Curfman McInnes   return 0;
127b49de8d1SLois Curfman McInnes }
128b49de8d1SLois Curfman McInnes 
129fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1308a729477SBarry Smith {
13144a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
132d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
13317699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
13417699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1351eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1366abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1371eb62cbbSBarry Smith   InsertMode  addv;
1381eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1391eb62cbbSBarry Smith 
1401eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
141d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
142dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
143bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1441eb62cbbSBarry Smith   }
1451eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1461eb62cbbSBarry Smith 
1471eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1480452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
149cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1500452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1511eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1521eb62cbbSBarry Smith     idx = aij->stash.idx[i];
15317699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1541eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1551eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1568a729477SBarry Smith       }
1578a729477SBarry Smith     }
1588a729477SBarry Smith   }
15917699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1601eb62cbbSBarry Smith 
1611eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1620452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
16317699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
16417699dbbSLois Curfman McInnes   nreceives = work[rank];
16517699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
16617699dbbSLois Curfman McInnes   nmax = work[rank];
1670452661fSBarry Smith   PetscFree(work);
1681eb62cbbSBarry Smith 
1691eb62cbbSBarry Smith   /* post receives:
1701eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1711eb62cbbSBarry Smith      (global index,value) we store the global index as a double
172d6dfbf8fSBarry Smith      to simplify the message passing.
1731eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1741eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1751eb62cbbSBarry Smith      this is a lot of wasted space.
1761eb62cbbSBarry Smith 
1771eb62cbbSBarry Smith 
1781eb62cbbSBarry Smith        This could be done better.
1791eb62cbbSBarry Smith   */
1800452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
18178b31e54SBarry Smith   CHKPTRQ(rvalues);
1820452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
18378b31e54SBarry Smith   CHKPTRQ(recv_waits);
1841eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
185416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1861eb62cbbSBarry Smith               comm,recv_waits+i);
1871eb62cbbSBarry Smith   }
1881eb62cbbSBarry Smith 
1891eb62cbbSBarry Smith   /* do sends:
1901eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1911eb62cbbSBarry Smith          the ith processor
1921eb62cbbSBarry Smith   */
1930452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1940452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
19578b31e54SBarry Smith   CHKPTRQ(send_waits);
1960452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1971eb62cbbSBarry Smith   starts[0] = 0;
19817699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1991eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2001eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2011eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2021eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2031eb62cbbSBarry Smith   }
2040452661fSBarry Smith   PetscFree(owner);
2051eb62cbbSBarry Smith   starts[0] = 0;
20617699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2071eb62cbbSBarry Smith   count = 0;
20817699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2091eb62cbbSBarry Smith     if (procs[i]) {
210416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2111eb62cbbSBarry Smith                 comm,send_waits+count++);
2121eb62cbbSBarry Smith     }
2131eb62cbbSBarry Smith   }
2140452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2151eb62cbbSBarry Smith 
2161eb62cbbSBarry Smith   /* Free cache space */
21778b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2181eb62cbbSBarry Smith 
2191eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2201eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2211eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2221eb62cbbSBarry Smith   aij->rmax       = nmax;
2231eb62cbbSBarry Smith 
2241eb62cbbSBarry Smith   return 0;
2251eb62cbbSBarry Smith }
22644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2271eb62cbbSBarry Smith 
228fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2291eb62cbbSBarry Smith {
23044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
231dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2321eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
233416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
234416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2351eb62cbbSBarry Smith   Scalar      *values,val;
2361eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2371eb62cbbSBarry Smith 
2381eb62cbbSBarry Smith   /*  wait on receives */
2391eb62cbbSBarry Smith   while (count) {
240d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2411eb62cbbSBarry Smith     /* unpack receives into our local space */
242d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
243c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2441eb62cbbSBarry Smith     n = n/3;
2451eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
2461eb62cbbSBarry Smith       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
2471eb62cbbSBarry Smith       col = (int) PETSCREAL(values[3*i+1]);
2481eb62cbbSBarry Smith       val = values[3*i+2];
2491eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2501eb62cbbSBarry Smith         col -= aij->cstart;
2511eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2521eb62cbbSBarry Smith       }
2531eb62cbbSBarry Smith       else {
254d6dfbf8fSBarry Smith         if (aij->assembled) {
255b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
256dbb450caSBarry Smith           col = aij->colmap[col] + shift;
257ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2582493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2592493cbb0SBarry Smith             col = (int) PETSCREAL(values[3*i+1]);
260d6dfbf8fSBarry Smith           }
2619e25ed09SBarry Smith         }
2621eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2631eb62cbbSBarry Smith       }
2641eb62cbbSBarry Smith     }
2651eb62cbbSBarry Smith     count--;
2661eb62cbbSBarry Smith   }
2670452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2681eb62cbbSBarry Smith 
2691eb62cbbSBarry Smith   /* wait on sends */
2701eb62cbbSBarry Smith   if (aij->nsends) {
2710452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
27278b31e54SBarry Smith     CHKPTRQ(send_status);
2731eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2740452661fSBarry Smith     PetscFree(send_status);
2751eb62cbbSBarry Smith   }
2760452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2771eb62cbbSBarry Smith 
27864119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
27978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
28078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2811eb62cbbSBarry Smith 
2822493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2832493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
284416022c9SBarry Smith   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
2852493cbb0SBarry Smith   if (aij->assembled && !other_disassembled) {
2862493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2872493cbb0SBarry Smith   }
2882493cbb0SBarry Smith 
2895e42470aSBarry Smith   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
29078b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
291d6dfbf8fSBarry Smith     aij->assembled = 1;
2925e42470aSBarry Smith   }
29378b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
29478b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2955e42470aSBarry Smith 
2968a729477SBarry Smith   return 0;
2978a729477SBarry Smith }
2988a729477SBarry Smith 
2992ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3001eb62cbbSBarry Smith {
30144a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
302dbd7a890SLois Curfman McInnes   int        ierr;
30378b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
30478b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3051eb62cbbSBarry Smith   return 0;
3061eb62cbbSBarry Smith }
3071eb62cbbSBarry Smith 
3081eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3091eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3101eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3111eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3121eb62cbbSBarry Smith    routine.
3131eb62cbbSBarry Smith */
31444a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3151eb62cbbSBarry Smith {
31644a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
31717699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3186abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
31917699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3205392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
321d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
322d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3231eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3241eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3251eb62cbbSBarry Smith   IS             istmp;
3261eb62cbbSBarry Smith 
32748d91487SBarry Smith   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
32878b31e54SBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
32978b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3301eb62cbbSBarry Smith 
3311eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3320452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
333cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3340452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3351eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3361eb62cbbSBarry Smith     idx = rows[i];
3371eb62cbbSBarry Smith     found = 0;
33817699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3391eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3401eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3411eb62cbbSBarry Smith       }
3421eb62cbbSBarry Smith     }
343bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3441eb62cbbSBarry Smith   }
34517699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3461eb62cbbSBarry Smith 
3471eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3480452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
34917699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
35017699dbbSLois Curfman McInnes   nrecvs = work[rank];
35117699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
35217699dbbSLois Curfman McInnes   nmax = work[rank];
3530452661fSBarry Smith   PetscFree(work);
3541eb62cbbSBarry Smith 
3551eb62cbbSBarry Smith   /* post receives:   */
3560452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
35778b31e54SBarry Smith   CHKPTRQ(rvalues);
3580452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
35978b31e54SBarry Smith   CHKPTRQ(recv_waits);
3601eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
361416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3621eb62cbbSBarry Smith   }
3631eb62cbbSBarry Smith 
3641eb62cbbSBarry Smith   /* do sends:
3651eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3661eb62cbbSBarry Smith          the ith processor
3671eb62cbbSBarry Smith   */
3680452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3690452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
37078b31e54SBarry Smith   CHKPTRQ(send_waits);
3710452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3721eb62cbbSBarry Smith   starts[0] = 0;
37317699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3741eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3751eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3761eb62cbbSBarry Smith   }
3771eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3781eb62cbbSBarry Smith 
3791eb62cbbSBarry Smith   starts[0] = 0;
38017699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3811eb62cbbSBarry Smith   count = 0;
38217699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3831eb62cbbSBarry Smith     if (procs[i]) {
384416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3851eb62cbbSBarry Smith     }
3861eb62cbbSBarry Smith   }
3870452661fSBarry Smith   PetscFree(starts);
3881eb62cbbSBarry Smith 
38917699dbbSLois Curfman McInnes   base = owners[rank];
3901eb62cbbSBarry Smith 
3911eb62cbbSBarry Smith   /*  wait on receives */
3920452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3931eb62cbbSBarry Smith   source = lens + nrecvs;
3941eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3951eb62cbbSBarry Smith   while (count) {
396d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3971eb62cbbSBarry Smith     /* unpack receives into our local space */
3981eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
399d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
400d6dfbf8fSBarry Smith     lens[imdex]  = n;
4011eb62cbbSBarry Smith     slen += n;
4021eb62cbbSBarry Smith     count--;
4031eb62cbbSBarry Smith   }
4040452661fSBarry Smith   PetscFree(recv_waits);
4051eb62cbbSBarry Smith 
4061eb62cbbSBarry Smith   /* move the data into the send scatter */
4070452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4081eb62cbbSBarry Smith   count = 0;
4091eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4101eb62cbbSBarry Smith     values = rvalues + i*nmax;
4111eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4121eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4131eb62cbbSBarry Smith     }
4141eb62cbbSBarry Smith   }
4150452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4160452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4171eb62cbbSBarry Smith 
4181eb62cbbSBarry Smith   /* actually zap the local rows */
419416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
420464493b3SBarry Smith   PLogObjectParent(A,istmp);
4210452661fSBarry Smith   PetscFree(lrows);
42278b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
42378b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
42478b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4251eb62cbbSBarry Smith 
4261eb62cbbSBarry Smith   /* wait on sends */
4271eb62cbbSBarry Smith   if (nsends) {
4280452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
42978b31e54SBarry Smith     CHKPTRQ(send_status);
4301eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4310452661fSBarry Smith     PetscFree(send_status);
4321eb62cbbSBarry Smith   }
4330452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4341eb62cbbSBarry Smith 
4351eb62cbbSBarry Smith   return 0;
4361eb62cbbSBarry Smith }
4371eb62cbbSBarry Smith 
438416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4391eb62cbbSBarry Smith {
440416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
4411eb62cbbSBarry Smith   int        ierr;
442416022c9SBarry Smith 
44348d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
44464119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
445416022c9SBarry Smith   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
44664119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
447416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4481eb62cbbSBarry Smith   return 0;
4491eb62cbbSBarry Smith }
4501eb62cbbSBarry Smith 
451416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
452da3a660dSBarry Smith {
453416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
454da3a660dSBarry Smith   int        ierr;
45548d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
45664119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
457416022c9SBarry Smith   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
45864119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
459416022c9SBarry Smith   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
460da3a660dSBarry Smith   return 0;
461da3a660dSBarry Smith }
462da3a660dSBarry Smith 
463416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
464da3a660dSBarry Smith {
465416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
466da3a660dSBarry Smith   int        ierr;
467da3a660dSBarry Smith 
46848d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
469da3a660dSBarry Smith   /* do nondiagonal part */
470416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
471da3a660dSBarry Smith   /* send it on its way */
472416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
47364119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
474da3a660dSBarry Smith   /* do local part */
475416022c9SBarry Smith   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
476da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
477da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
478da3a660dSBarry Smith   /* but is not perhaps always true. */
479416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
48064119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
481da3a660dSBarry Smith   return 0;
482da3a660dSBarry Smith }
483da3a660dSBarry Smith 
484416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
485da3a660dSBarry Smith {
486416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
487da3a660dSBarry Smith   int        ierr;
488da3a660dSBarry Smith 
48948d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
490da3a660dSBarry Smith   /* do nondiagonal part */
491416022c9SBarry Smith   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
492da3a660dSBarry Smith   /* send it on its way */
493416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
49464119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
495da3a660dSBarry Smith   /* do local part */
496416022c9SBarry Smith   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
497da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
498da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
499da3a660dSBarry Smith   /* but is not perhaps always true. */
500416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
50164119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
502da3a660dSBarry Smith   return 0;
503da3a660dSBarry Smith }
504da3a660dSBarry Smith 
5051eb62cbbSBarry Smith /*
5061eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5071eb62cbbSBarry Smith    diagonal block
5081eb62cbbSBarry Smith */
509416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5101eb62cbbSBarry Smith {
511416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
51248d91487SBarry Smith   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
513416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5141eb62cbbSBarry Smith }
5151eb62cbbSBarry Smith 
51644a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5171eb62cbbSBarry Smith {
5181eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
51944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5201eb62cbbSBarry Smith   int        ierr;
521a5a9c739SBarry Smith #if defined(PETSC_LOG)
5226e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
523a5a9c739SBarry Smith #endif
5240452661fSBarry Smith   PetscFree(aij->rowners);
52578b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
52678b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5270452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5280452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5291eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
530a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5310452661fSBarry Smith   PetscFree(aij);
532a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5330452661fSBarry Smith   PetscHeaderDestroy(mat);
5341eb62cbbSBarry Smith   return 0;
5351eb62cbbSBarry Smith }
5367857610eSBarry Smith #include "draw.h"
537b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
538ee50ffe9SBarry Smith 
539416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5401eb62cbbSBarry Smith {
541416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
542416022c9SBarry Smith   int         ierr;
543416022c9SBarry Smith 
54417699dbbSLois Curfman McInnes   if (aij->size == 1) {
545416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
546416022c9SBarry Smith   }
547a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
548416022c9SBarry Smith   return 0;
549416022c9SBarry Smith }
550416022c9SBarry Smith 
551d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
552416022c9SBarry Smith {
55344a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
554dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
555a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
556d13ab20cSBarry Smith   PetscObject vobj = (PetscObject) viewer;
557d636dbe3SBarry Smith   FILE        *fd;
558416022c9SBarry Smith 
559416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
560416022c9SBarry Smith     ierr = ViewerFileGetFormat_Private(viewer,&format);
56108480c60SBarry Smith     if (format == FILE_FORMAT_INFO_DETAILED) {
562a56f8943SBarry Smith       int nz,nzalloc,mem;
563a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
564a56f8943SBarry Smith       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
565a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
566a56f8943SBarry Smith       MPIU_Seq_begin(mat->comm,1);
567a56f8943SBarry Smith       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
568a56f8943SBarry Smith                 nzalloc,mem);
56908480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
57008480c60SBarry Smith       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
57108480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
57208480c60SBarry Smith       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
573a56f8943SBarry Smith       fflush(fd);
574a56f8943SBarry Smith       MPIU_Seq_end(mat->comm,1);
575a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
576416022c9SBarry Smith       return 0;
577416022c9SBarry Smith     }
57808480c60SBarry Smith     else if (format == FILE_FORMAT_INFO) {
57908480c60SBarry Smith       return 0;
58008480c60SBarry Smith     }
581416022c9SBarry Smith   }
582416022c9SBarry Smith 
583416022c9SBarry Smith   if (vobj->type == ASCII_FILE_VIEWER) {
584d636dbe3SBarry Smith     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
5857f813858SBarry Smith     MPIU_Seq_begin(mat->comm,1);
586d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
58717699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5881eb62cbbSBarry Smith            aij->cend);
58978b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
59078b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
591d13ab20cSBarry Smith     fflush(fd);
5927f813858SBarry Smith     MPIU_Seq_end(mat->comm,1);
593d13ab20cSBarry Smith   }
594416022c9SBarry Smith   else {
595a56f8943SBarry Smith     int size = aij->size;
596a56f8943SBarry Smith     rank = aij->rank;
59717699dbbSLois Curfman McInnes     if (size == 1) {
59878b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
59995373324SBarry Smith     }
60095373324SBarry Smith     else {
60195373324SBarry Smith       /* assemble the entire matrix onto first processor. */
60295373324SBarry Smith       Mat         A;
603ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6042eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
60595373324SBarry Smith       Scalar      *a;
6062ee70a88SLois Curfman McInnes 
60717699dbbSLois Curfman McInnes       if (!rank) {
608b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
609c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
61095373324SBarry Smith       }
61195373324SBarry Smith       else {
612b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
613c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
61495373324SBarry Smith       }
615464493b3SBarry Smith       PLogObjectParent(mat,A);
616416022c9SBarry Smith 
61795373324SBarry Smith       /* copy over the A part */
618ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6192ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
62095373324SBarry Smith       row = aij->rstart;
621dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
62295373324SBarry Smith       for ( i=0; i<m; i++ ) {
623416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
62495373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
62595373324SBarry Smith       }
6262ee70a88SLois Curfman McInnes       aj = Aloc->j;
627dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
62895373324SBarry Smith 
62995373324SBarry Smith       /* copy over the B part */
630ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6312ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
63295373324SBarry Smith       row = aij->rstart;
6330452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
634dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
63595373324SBarry Smith       for ( i=0; i<m; i++ ) {
636416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
63795373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
63895373324SBarry Smith       }
6390452661fSBarry Smith       PetscFree(ct);
64078b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
64178b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
64217699dbbSLois Curfman McInnes       if (!rank) {
64378b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
64495373324SBarry Smith       }
64578b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
64695373324SBarry Smith     }
64795373324SBarry Smith   }
6481eb62cbbSBarry Smith   return 0;
6491eb62cbbSBarry Smith }
6501eb62cbbSBarry Smith 
651416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
652416022c9SBarry Smith {
653416022c9SBarry Smith   Mat         mat = (Mat) obj;
654416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
655416022c9SBarry Smith   int         ierr;
656416022c9SBarry Smith   PetscObject vobj = (PetscObject) viewer;
657416022c9SBarry Smith 
65848d91487SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
659416022c9SBarry Smith   if (!viewer) {
660416022c9SBarry Smith     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
661416022c9SBarry Smith   }
662416022c9SBarry Smith   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
663416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
664d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
665416022c9SBarry Smith   }
666416022c9SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
667d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
668d7e8b826SBarry Smith   }
669d7e8b826SBarry Smith   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
670d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
671416022c9SBarry Smith   }
672416022c9SBarry Smith   else if (vobj->cookie == DRAW_COOKIE) {
673d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
674416022c9SBarry Smith   }
675416022c9SBarry Smith   else if (vobj->type == BINARY_FILE_VIEWER) {
676416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
677416022c9SBarry Smith   }
678416022c9SBarry Smith   return 0;
679416022c9SBarry Smith }
680416022c9SBarry Smith 
681ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat);
6821eb62cbbSBarry Smith /*
6831eb62cbbSBarry Smith     This has to provide several versions.
6841eb62cbbSBarry Smith 
6851eb62cbbSBarry Smith      1) per sequential
6861eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6871eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
688d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
6891eb62cbbSBarry Smith */
690fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
691dbb450caSBarry Smith                            double fshift,int its,Vec xx)
6928a729477SBarry Smith {
69344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
694d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
695ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
6966abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
6976abc6512SBarry Smith   int        ierr,*idx, *diag;
698416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
699da3a660dSBarry Smith   Vec        tt;
7008a729477SBarry Smith 
70148d91487SBarry Smith   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
7021eb62cbbSBarry Smith 
703d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
704dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
705dbb450caSBarry Smith   ls = ls + shift;
706ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
707d6dfbf8fSBarry Smith   diag = A->diag;
708acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
70948d91487SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
710acb40c82SBarry Smith   }
711da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
712da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
713da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
714da3a660dSBarry Smith 
715da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
716da3a660dSBarry Smith 
717da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
718da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
719da3a660dSBarry Smith     is the relaxation factor.
720da3a660dSBarry Smith     */
72178b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
722464493b3SBarry Smith     PLogObjectParent(matin,tt);
723da3a660dSBarry Smith     VecGetArray(tt,&t);
724da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
725da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
726da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
72764119d58SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
72878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
729da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
730da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
731dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
732dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
733da3a660dSBarry Smith       sum  = b[i];
734da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
735dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
736da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
737dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
738dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
739da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
740da3a660dSBarry Smith       x[i] = omega*(sum/d);
741da3a660dSBarry Smith     }
74264119d58SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
74378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
744da3a660dSBarry Smith 
745da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
746da3a660dSBarry Smith     v = A->a;
747dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
748da3a660dSBarry Smith 
749da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
750dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
751da3a660dSBarry Smith     diag = A->diag;
752da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
75364119d58SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
75478b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
755da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
756da3a660dSBarry Smith       n    = diag[i] - A->i[i];
757dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
758dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
759da3a660dSBarry Smith       sum  = t[i];
760da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
761dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
762da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
763dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
764dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
765da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
766da3a660dSBarry Smith       t[i] = omega*(sum/d);
767da3a660dSBarry Smith     }
76864119d58SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
76978b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
770da3a660dSBarry Smith     /*  x = x + t */
771da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
772da3a660dSBarry Smith     VecDestroy(tt);
773da3a660dSBarry Smith     return 0;
774da3a660dSBarry Smith   }
775da3a660dSBarry Smith 
7761eb62cbbSBarry Smith 
777d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
778da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
779da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
780da3a660dSBarry Smith     }
781da3a660dSBarry Smith     else {
78264119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
78378b31e54SBarry Smith       CHKERRQ(ierr);
78464119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
78578b31e54SBarry Smith       CHKERRQ(ierr);
786da3a660dSBarry Smith     }
787d6dfbf8fSBarry Smith     while (its--) {
788d6dfbf8fSBarry Smith       /* go down through the rows */
78964119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
79078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
791d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
792d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
793dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
794dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
795d6dfbf8fSBarry Smith         sum  = b[i];
796d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
797dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
798d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
799dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
800dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
801d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
802d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
803d6dfbf8fSBarry Smith       }
80464119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
80578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
806d6dfbf8fSBarry Smith       /* come up through the rows */
80764119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
80878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
809d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
810d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
811dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
812dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
813d6dfbf8fSBarry Smith         sum  = b[i];
814d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
815dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
816d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
817dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
818dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
819d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
820d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
821d6dfbf8fSBarry Smith       }
82264119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
82378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
824d6dfbf8fSBarry Smith     }
825d6dfbf8fSBarry Smith   }
826d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
827da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
828da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
82964119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
83078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
831da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
832da3a660dSBarry Smith         n    = diag[i] - A->i[i];
833dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
834dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
835da3a660dSBarry Smith         sum  = b[i];
836da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
837dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
838da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
839dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
840dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
841da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
842da3a660dSBarry Smith         x[i] = omega*(sum/d);
843da3a660dSBarry Smith       }
84464119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
84578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
846da3a660dSBarry Smith       its--;
847da3a660dSBarry Smith     }
848d6dfbf8fSBarry Smith     while (its--) {
84964119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
85078b31e54SBarry Smith       CHKERRQ(ierr);
85164119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
85278b31e54SBarry Smith       CHKERRQ(ierr);
85364119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
85478b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
855d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
856d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
857dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
858dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
859d6dfbf8fSBarry Smith         sum  = b[i];
860d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
861dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
862d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
863dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
864dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
865d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
866d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
867d6dfbf8fSBarry Smith       }
86864119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
86978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
870d6dfbf8fSBarry Smith     }
871d6dfbf8fSBarry Smith   }
872d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
873da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
874da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
87564119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
87678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
877da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
878da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
879dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
880dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
881da3a660dSBarry Smith         sum  = b[i];
882da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
883dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
884da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
885dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
886dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
887da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
888da3a660dSBarry Smith         x[i] = omega*(sum/d);
889da3a660dSBarry Smith       }
89064119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
89178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
892da3a660dSBarry Smith       its--;
893da3a660dSBarry Smith     }
894d6dfbf8fSBarry Smith     while (its--) {
89564119d58SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
89678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
89764119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
89878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
89964119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
90078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
901d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
902d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
903dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
904dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
905d6dfbf8fSBarry Smith         sum  = b[i];
906d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
907dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
908d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
909dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
910dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
911d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
912d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
913d6dfbf8fSBarry Smith       }
91464119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
91578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
916d6dfbf8fSBarry Smith     }
917d6dfbf8fSBarry Smith   }
918d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
919da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
920dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
921da3a660dSBarry Smith     }
92264119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
92378b31e54SBarry Smith     CHKERRQ(ierr);
92464119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
92578b31e54SBarry Smith     CHKERRQ(ierr);
926d6dfbf8fSBarry Smith     while (its--) {
927d6dfbf8fSBarry Smith       /* go down through the rows */
928d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
929d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
930dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
931dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
932d6dfbf8fSBarry Smith         sum  = b[i];
933d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
934dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
935d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
936dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
937dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
938d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
939d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
940d6dfbf8fSBarry Smith       }
941d6dfbf8fSBarry Smith       /* come up through the rows */
942d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
943d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
944dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
945dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
946d6dfbf8fSBarry Smith         sum  = b[i];
947d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
948dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
949d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
950dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
951dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
952d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
953d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
954d6dfbf8fSBarry Smith       }
955d6dfbf8fSBarry Smith     }
956d6dfbf8fSBarry Smith   }
957d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
958da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
959dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
960da3a660dSBarry Smith     }
96164119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
96278b31e54SBarry Smith     CHKERRQ(ierr);
96364119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
96478b31e54SBarry Smith     CHKERRQ(ierr);
965d6dfbf8fSBarry Smith     while (its--) {
966d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
967d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
968dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
969dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
970d6dfbf8fSBarry Smith         sum  = b[i];
971d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
972dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
973d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
974dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
975dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
976d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
977d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
978d6dfbf8fSBarry Smith       }
979d6dfbf8fSBarry Smith     }
980d6dfbf8fSBarry Smith   }
981d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
982da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
983dbb450caSBarry Smith       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
984da3a660dSBarry Smith     }
98564119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
98678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
98764119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
98878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
989d6dfbf8fSBarry Smith     while (its--) {
990d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
991d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
992dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
993dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
994d6dfbf8fSBarry Smith         sum  = b[i];
995d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
996dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
997d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
998dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
999dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
1000d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1001d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1002d6dfbf8fSBarry Smith       }
1003d6dfbf8fSBarry Smith     }
1004d6dfbf8fSBarry Smith   }
10058a729477SBarry Smith   return 0;
10068a729477SBarry Smith }
1007a66be287SLois Curfman McInnes 
1008fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1009fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
1010a66be287SLois Curfman McInnes {
1011a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1012a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
1013a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1014a66be287SLois Curfman McInnes 
101578b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
101678b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1017a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1018a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
1019a66be287SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1020a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1021d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1022a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1023a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1024d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1025a66be287SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1026a66be287SLois Curfman McInnes   }
1027a66be287SLois Curfman McInnes   return 0;
1028a66be287SLois Curfman McInnes }
1029a66be287SLois Curfman McInnes 
1030299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1031299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1032299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1033299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1034299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1035299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1036299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1037299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1038299609e3SLois Curfman McInnes 
1039416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1040c74985f6SBarry Smith {
1041c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1042c74985f6SBarry Smith 
1043c0bbcb79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
1044c0bbcb79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
1045c0bbcb79SLois Curfman McInnes       op == COLUMNS_SORTED ||
1046c0bbcb79SLois Curfman McInnes       op == ROW_ORIENTED) {
1047c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
1048c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
1049c74985f6SBarry Smith   }
1050c0bbcb79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
1051c0bbcb79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
1052c0bbcb79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1053c0bbcb79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
1054c0bbcb79SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10554b0e389bSBarry Smith   else if (op == COLUMN_ORIENTED) {
10564b0e389bSBarry Smith     a->roworiented = 0;
10574b0e389bSBarry Smith     MatSetOption(a->A,op);
10584b0e389bSBarry Smith     MatSetOption(a->B,op);
10594b0e389bSBarry Smith   }
1060c0bbcb79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
10614d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1062c0bbcb79SLois Curfman McInnes   else
10634d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1064c74985f6SBarry Smith   return 0;
1065c74985f6SBarry Smith }
1066c74985f6SBarry Smith 
1067d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1068c74985f6SBarry Smith {
106944a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1070c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1071c74985f6SBarry Smith   return 0;
1072c74985f6SBarry Smith }
1073c74985f6SBarry Smith 
1074d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1075c74985f6SBarry Smith {
107644a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1077b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1078c74985f6SBarry Smith   return 0;
1079c74985f6SBarry Smith }
1080c74985f6SBarry Smith 
1081d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1082c74985f6SBarry Smith {
108344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1084c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1085c74985f6SBarry Smith   return 0;
1086c74985f6SBarry Smith }
1087c74985f6SBarry Smith 
108839e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
108939e00950SLois Curfman McInnes {
1090154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1091154123eaSLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1092154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1093154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
109439e00950SLois Curfman McInnes 
1095b49de8d1SLois Curfman McInnes   if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix");
1096b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1097abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
109839e00950SLois Curfman McInnes 
1099154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1100154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1101154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
110278b31e54SBarry Smith   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
110378b31e54SBarry Smith   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1104154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1105154123eaSLois Curfman McInnes 
1106154123eaSLois Curfman McInnes   if (v  || idx) {
1107154123eaSLois Curfman McInnes     if (nztot) {
1108154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1109299609e3SLois Curfman McInnes       int imark;
111048b35521SBarry Smith       if (mat->assembled) {
1111154123eaSLois Curfman McInnes         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
111248b35521SBarry Smith       }
1113154123eaSLois Curfman McInnes       if (v) {
11140452661fSBarry Smith         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
111539e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1116154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1117154123eaSLois Curfman McInnes           else break;
1118154123eaSLois Curfman McInnes         }
1119154123eaSLois Curfman McInnes         imark = i;
1120154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1121299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1122154123eaSLois Curfman McInnes       }
1123154123eaSLois Curfman McInnes       if (idx) {
11240452661fSBarry Smith         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1125154123eaSLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1126154123eaSLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1127154123eaSLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1128154123eaSLois Curfman McInnes           else break;
1129154123eaSLois Curfman McInnes         }
1130154123eaSLois Curfman McInnes         imark = i;
1131154123eaSLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1132299609e3SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
113339e00950SLois Curfman McInnes       }
113439e00950SLois Curfman McInnes     }
113539e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1136154123eaSLois Curfman McInnes   }
113739e00950SLois Curfman McInnes   *nz = nztot;
113878b31e54SBarry Smith   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
113978b31e54SBarry Smith   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
114039e00950SLois Curfman McInnes   return 0;
114139e00950SLois Curfman McInnes }
114239e00950SLois Curfman McInnes 
114339e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
114439e00950SLois Curfman McInnes {
11450452661fSBarry Smith   if (idx) PetscFree(*idx);
11460452661fSBarry Smith   if (v) PetscFree(*v);
114739e00950SLois Curfman McInnes   return 0;
114839e00950SLois Curfman McInnes }
114939e00950SLois Curfman McInnes 
1150cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1151855ac2c5SLois Curfman McInnes {
1152855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1153ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1154416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1155416022c9SBarry Smith   double     sum = 0.0;
115604ca555eSLois Curfman McInnes   Scalar     *v;
115704ca555eSLois Curfman McInnes 
1158416022c9SBarry Smith   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
115917699dbbSLois Curfman McInnes   if (aij->size == 1) {
116014183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
116137fa93a5SLois Curfman McInnes   } else {
116204ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
116304ca555eSLois Curfman McInnes       v = amat->a;
116404ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
116504ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
116604ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
116704ca555eSLois Curfman McInnes #else
116804ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
116904ca555eSLois Curfman McInnes #endif
117004ca555eSLois Curfman McInnes       }
117104ca555eSLois Curfman McInnes       v = bmat->a;
117204ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
117304ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
117404ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
117504ca555eSLois Curfman McInnes #else
117604ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
117704ca555eSLois Curfman McInnes #endif
117804ca555eSLois Curfman McInnes       }
117904ca555eSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
118004ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
118104ca555eSLois Curfman McInnes     }
118204ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
118304ca555eSLois Curfman McInnes       double *tmp, *tmp2;
118404ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
11850452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
11860452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1187cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
118804ca555eSLois Curfman McInnes       *norm = 0.0;
118904ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
119004ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1191579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
119204ca555eSLois Curfman McInnes       }
119304ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
119404ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1195579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
119604ca555eSLois Curfman McInnes       }
119704ca555eSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
119804ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
119904ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
120004ca555eSLois Curfman McInnes       }
12010452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
120204ca555eSLois Curfman McInnes     }
120304ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1204515d9167SLois Curfman McInnes       double ntemp = 0.0;
120504ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1206dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
120704ca555eSLois Curfman McInnes         sum = 0.0;
120804ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1209cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
121004ca555eSLois Curfman McInnes         }
1211dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
121204ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1213cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
121404ca555eSLois Curfman McInnes         }
1215515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
121604ca555eSLois Curfman McInnes       }
1217515d9167SLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
121804ca555eSLois Curfman McInnes     }
121904ca555eSLois Curfman McInnes     else {
1220515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
122104ca555eSLois Curfman McInnes     }
122237fa93a5SLois Curfman McInnes   }
1223855ac2c5SLois Curfman McInnes   return 0;
1224855ac2c5SLois Curfman McInnes }
1225855ac2c5SLois Curfman McInnes 
12260de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1227b7c46309SBarry Smith {
1228b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1229dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1230416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1231416022c9SBarry Smith   Mat        B;
1232b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1233b7c46309SBarry Smith   Scalar     *array;
1234b7c46309SBarry Smith 
1235416022c9SBarry Smith   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1236b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1237b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1238b7c46309SBarry Smith 
1239b7c46309SBarry Smith   /* copy over the A part */
1240ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1241b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1242b7c46309SBarry Smith   row = a->rstart;
1243dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1244b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1245416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1246b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1247b7c46309SBarry Smith   }
1248b7c46309SBarry Smith   aj = Aloc->j;
1249dbb450caSBarry Smith   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1250b7c46309SBarry Smith 
1251b7c46309SBarry Smith   /* copy over the B part */
1252ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1253b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1254b7c46309SBarry Smith   row = a->rstart;
12550452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1256dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1257b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1258416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1259b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1260b7c46309SBarry Smith   }
12610452661fSBarry Smith   PetscFree(ct);
1262b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1263b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
12640de55854SLois Curfman McInnes   if (matout) {
12650de55854SLois Curfman McInnes     *matout = B;
12660de55854SLois Curfman McInnes   } else {
12670de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12680452661fSBarry Smith     PetscFree(a->rowners);
12690de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
12700de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
12710452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
12720452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
12730de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1274a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
12750452661fSBarry Smith     PetscFree(a);
1276416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
12770452661fSBarry Smith     PetscHeaderDestroy(B);
12780de55854SLois Curfman McInnes   }
1279b7c46309SBarry Smith   return 0;
1280b7c46309SBarry Smith }
1281b7c46309SBarry Smith 
1282682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1283682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1284682d7d0cSBarry Smith {
1285682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1286682d7d0cSBarry Smith 
1287682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1288682d7d0cSBarry Smith   else return 0;
1289682d7d0cSBarry Smith }
1290682d7d0cSBarry Smith 
1291fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
12923d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1293d6dfbf8fSBarry Smith 
12948a729477SBarry Smith /* -------------------------------------------------------------------*/
12952ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
129639e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
129744a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
129844a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1299299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1300299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1301299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
130244a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1303b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1304a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1305855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1306ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13071eb62cbbSBarry Smith        0,
1308299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1309299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1310299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1311d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1312299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
13133d1612f7SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1314b49de8d1SLois Curfman McInnes        0,0,0,
1315682d7d0cSBarry Smith        0,0,MatGetValues_MPIAIJ,0,
1316682d7d0cSBarry Smith        MatPrintHelp_MPIAIJ};
13178a729477SBarry Smith 
13181987afe7SBarry Smith /*@C
1319ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1320ff756334SLois Curfman McInnes    (the default parallel PETSc format).
13218a729477SBarry Smith 
13228a729477SBarry Smith    Input Parameters:
13231eb62cbbSBarry Smith .  comm - MPI communicator
13247d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
13257d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
13267d3e4905SLois Curfman McInnes            if N is given)
13277d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
13287d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
13297d3e4905SLois Curfman McInnes            if n is given)
1330ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1331ff756334SLois Curfman McInnes            (same for all local rows)
1332ab693e5aSLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1333ab693e5aSLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1334ab693e5aSLois Curfman McInnes            for the diagonal entry even if it is zero.
1335ab693e5aSLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1336ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
1337ab693e5aSLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1338ab693e5aSLois Curfman McInnes            submatrix or null (possibly different for each row).
13398a729477SBarry Smith 
1340ff756334SLois Curfman McInnes    Output Parameter:
13418a729477SBarry Smith .  newmat - the matrix
13428a729477SBarry Smith 
1343ff756334SLois Curfman McInnes    Notes:
1344ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1345ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
13460002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
13470002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1348ff756334SLois Curfman McInnes 
1349ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1350ff756334SLois Curfman McInnes    (possibly both).
1351ff756334SLois Curfman McInnes 
1352e0245417SLois Curfman McInnes    Storage Information:
1353e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1354e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1355e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1356e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1357e0245417SLois Curfman McInnes 
1358e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
13595ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
13605ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
13615ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
13625ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1363ff756334SLois Curfman McInnes 
1364c451ab25SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
1365c451ab25SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
1366c451ab25SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
1367c451ab25SLois Curfman McInnes 
1368c451ab25SLois Curfman McInnes    Options Database Keys:
1369c451ab25SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
1370c451ab25SLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit (max limit=5)
1371c451ab25SLois Curfman McInnes 
1372dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1373ff756334SLois Curfman McInnes 
1374fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
13758a729477SBarry Smith @*/
13761eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
13771eb62cbbSBarry Smith                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
13788a729477SBarry Smith {
13798a729477SBarry Smith   Mat          mat;
1380416022c9SBarry Smith   Mat_MPIAIJ   *a;
13816abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1382416022c9SBarry Smith 
13838a729477SBarry Smith   *newmat         = 0;
13840452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1385a5a9c739SBarry Smith   PLogObjectCreate(mat);
13860452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1387416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
138844a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
138944a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
13908a729477SBarry Smith   mat->factor     = 0;
1391d6dfbf8fSBarry Smith 
139264119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
139317699dbbSLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
139417699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
13951eb62cbbSBarry Smith 
1396b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
13971987afe7SBarry Smith     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
13981987afe7SBarry Smith 
1399dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
14001eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1401d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1402dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1403dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
14041eb62cbbSBarry Smith   }
140517699dbbSLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
140617699dbbSLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1407416022c9SBarry Smith   a->m = m;
1408416022c9SBarry Smith   a->n = n;
1409416022c9SBarry Smith   a->N = N;
1410416022c9SBarry Smith   a->M = M;
14111eb62cbbSBarry Smith 
14121eb62cbbSBarry Smith   /* build local table of row and column ownerships */
14130452661fSBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1414579c6b6fSBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
141517699dbbSLois Curfman McInnes   a->cowners = a->rowners + a->size + 1;
1416416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1417416022c9SBarry Smith   a->rowners[0] = 0;
141817699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1419416022c9SBarry Smith     a->rowners[i] += a->rowners[i-1];
14208a729477SBarry Smith   }
142117699dbbSLois Curfman McInnes   a->rstart = a->rowners[a->rank];
142217699dbbSLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
1423416022c9SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1424416022c9SBarry Smith   a->cowners[0] = 0;
142517699dbbSLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1426416022c9SBarry Smith     a->cowners[i] += a->cowners[i-1];
14278a729477SBarry Smith   }
142817699dbbSLois Curfman McInnes   a->cstart = a->cowners[a->rank];
142917699dbbSLois Curfman McInnes   a->cend   = a->cowners[a->rank+1];
14308a729477SBarry Smith 
14315ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1432416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1433416022c9SBarry Smith   PLogObjectParent(mat,a->A);
14347b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1435416022c9SBarry Smith   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1436416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14378a729477SBarry Smith 
14381eb62cbbSBarry Smith   /* build cache for off array entries formed */
1439416022c9SBarry Smith   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1440416022c9SBarry Smith   a->colmap      = 0;
1441416022c9SBarry Smith   a->garray      = 0;
14424b0e389bSBarry Smith   a->roworiented = 1;
14438a729477SBarry Smith 
14441eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
1445416022c9SBarry Smith   a->lvec      = 0;
1446416022c9SBarry Smith   a->Mvctx     = 0;
1447416022c9SBarry Smith   a->assembled = 0;
14488a729477SBarry Smith 
1449d6dfbf8fSBarry Smith   *newmat = mat;
1450d6dfbf8fSBarry Smith   return 0;
1451d6dfbf8fSBarry Smith }
1452c74985f6SBarry Smith 
14533d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1454d6dfbf8fSBarry Smith {
1455d6dfbf8fSBarry Smith   Mat        mat;
1456416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1457*25cdf11fSBarry Smith   int        ierr, len,flg;
1458d6dfbf8fSBarry Smith 
14593d1612f7SBarry Smith   if (!oldmat->assembled) SETERRQ(1,"MatConvertSameType_MPIAIJ:Must assemble matrix");
1460416022c9SBarry Smith   *newmat       = 0;
14610452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1462a5a9c739SBarry Smith   PLogObjectCreate(mat);
14630452661fSBarry Smith   mat->data     = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1464416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
146544a69424SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIAIJ;
146644a69424SLois Curfman McInnes   mat->view     = MatView_MPIAIJ;
1467d6dfbf8fSBarry Smith   mat->factor   = matin->factor;
1468d6dfbf8fSBarry Smith 
1469416022c9SBarry Smith   a->m          = oldmat->m;
1470416022c9SBarry Smith   a->n          = oldmat->n;
1471416022c9SBarry Smith   a->M          = oldmat->M;
1472416022c9SBarry Smith   a->N          = oldmat->N;
1473d6dfbf8fSBarry Smith 
1474416022c9SBarry Smith   a->assembled  = 1;
1475416022c9SBarry Smith   a->rstart     = oldmat->rstart;
1476416022c9SBarry Smith   a->rend       = oldmat->rend;
1477416022c9SBarry Smith   a->cstart     = oldmat->cstart;
1478416022c9SBarry Smith   a->cend       = oldmat->cend;
147917699dbbSLois Curfman McInnes   a->size       = oldmat->size;
148017699dbbSLois Curfman McInnes   a->rank       = oldmat->rank;
148164119d58SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
1482d6dfbf8fSBarry Smith 
14830452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
148417699dbbSLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
148517699dbbSLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1486416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14872ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
14880452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1489416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1490416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1491416022c9SBarry Smith   } else a->colmap = 0;
1492ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
14930452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1494464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1495416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1496416022c9SBarry Smith   } else a->garray = 0;
1497d6dfbf8fSBarry Smith 
1498416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1499416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1500a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1501416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1502416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1503416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1504416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1505416022c9SBarry Smith   PLogObjectParent(mat,a->B);
1506*25cdf11fSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg) CHKERRQ(ierr);
1507*25cdf11fSBarry Smith   if (flg) {
1508682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1509682d7d0cSBarry Smith   }
15108a729477SBarry Smith   *newmat = mat;
15118a729477SBarry Smith   return 0;
15128a729477SBarry Smith }
1513416022c9SBarry Smith 
1514416022c9SBarry Smith #include "sysio.h"
1515416022c9SBarry Smith 
151602834360SBarry Smith int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1517416022c9SBarry Smith {
1518d65a2f8fSBarry Smith   Mat          A;
1519416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1520d65a2f8fSBarry Smith   Scalar       *vals,*svals;
1521416022c9SBarry Smith   PetscObject  vobj = (PetscObject) bview;
1522416022c9SBarry Smith   MPI_Comm     comm = vobj->comm;
1523416022c9SBarry Smith   MPI_Status   status;
152417699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1525d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1526d65a2f8fSBarry Smith   int          tag = ((PetscObject)bview)->tag;
1527416022c9SBarry Smith 
152817699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
152917699dbbSLois Curfman McInnes   if (!rank) {
1530416022c9SBarry Smith     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1531416022c9SBarry Smith     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
153202834360SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1533416022c9SBarry Smith   }
1534416022c9SBarry Smith 
1535416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1536416022c9SBarry Smith   M = header[1]; N = header[2];
1537416022c9SBarry Smith   /* determine ownership of all rows */
153817699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
15390452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1540416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1541416022c9SBarry Smith   rowners[0] = 0;
154217699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1543416022c9SBarry Smith     rowners[i] += rowners[i-1];
1544416022c9SBarry Smith   }
154517699dbbSLois Curfman McInnes   rstart = rowners[rank];
154617699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1547416022c9SBarry Smith 
1548416022c9SBarry Smith   /* distribute row lengths to all processors */
15490452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1550416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
155117699dbbSLois Curfman McInnes   if (!rank) {
15520452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1553416022c9SBarry Smith     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
15540452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
155517699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1556d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
15570452661fSBarry Smith     PetscFree(sndcounts);
1558416022c9SBarry Smith   }
1559416022c9SBarry Smith   else {
1560416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1561416022c9SBarry Smith   }
1562416022c9SBarry Smith 
156317699dbbSLois Curfman McInnes   if (!rank) {
1564416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
15650452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1566cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
156717699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1568416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1569416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1570416022c9SBarry Smith       }
1571416022c9SBarry Smith     }
15720452661fSBarry Smith     PetscFree(rowlengths);
1573416022c9SBarry Smith 
1574416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1575416022c9SBarry Smith     maxnz = 0;
157617699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
15770452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1578416022c9SBarry Smith     }
15790452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1580416022c9SBarry Smith 
1581416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1582416022c9SBarry Smith     nz = procsnz[0];
15830452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1584d65a2f8fSBarry Smith     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1585d65a2f8fSBarry Smith 
1586d65a2f8fSBarry Smith     /* read in every one elses and ship off */
158717699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1588d65a2f8fSBarry Smith       nz = procsnz[i];
1589416022c9SBarry Smith       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1590d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1591d65a2f8fSBarry Smith     }
15920452661fSBarry Smith     PetscFree(cols);
1593416022c9SBarry Smith   }
1594416022c9SBarry Smith   else {
1595416022c9SBarry Smith     /* determine buffer space needed for message */
1596416022c9SBarry Smith     nz = 0;
1597416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1598416022c9SBarry Smith       nz += ourlens[i];
1599416022c9SBarry Smith     }
16000452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1601416022c9SBarry Smith 
1602416022c9SBarry Smith     /* receive message of column indices*/
1603d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1604416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
160502834360SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1606416022c9SBarry Smith   }
1607416022c9SBarry Smith 
1608416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1609cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1610416022c9SBarry Smith   jj = 0;
1611416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1612416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1613d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1614416022c9SBarry Smith       jj++;
1615416022c9SBarry Smith     }
1616416022c9SBarry Smith   }
1617d65a2f8fSBarry Smith 
1618d65a2f8fSBarry Smith   /* create our matrix */
1619416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1620416022c9SBarry Smith     ourlens[i] -= offlens[i];
1621416022c9SBarry Smith   }
1622d65a2f8fSBarry Smith   if (type == MATMPIAIJ) {
1623d65a2f8fSBarry Smith     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1624d65a2f8fSBarry Smith   }
1625d65a2f8fSBarry Smith   else if (type == MATMPIROW) {
1626d65a2f8fSBarry Smith     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1627d65a2f8fSBarry Smith   }
1628d65a2f8fSBarry Smith   A = *newmat;
1629d65a2f8fSBarry Smith   MatSetOption(A,COLUMNS_SORTED);
1630d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1631d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1632d65a2f8fSBarry Smith   }
1633416022c9SBarry Smith 
163417699dbbSLois Curfman McInnes   if (!rank) {
16350452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1636416022c9SBarry Smith 
1637416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1638416022c9SBarry Smith     nz = procsnz[0];
1639416022c9SBarry Smith     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1640d65a2f8fSBarry Smith 
1641d65a2f8fSBarry Smith     /* insert into matrix */
1642d65a2f8fSBarry Smith     jj      = rstart;
1643d65a2f8fSBarry Smith     smycols = mycols;
1644d65a2f8fSBarry Smith     svals   = vals;
1645d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1646d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1647d65a2f8fSBarry Smith       smycols += ourlens[i];
1648d65a2f8fSBarry Smith       svals   += ourlens[i];
1649d65a2f8fSBarry Smith       jj++;
1650416022c9SBarry Smith     }
1651416022c9SBarry Smith 
1652d65a2f8fSBarry Smith     /* read in other processors and ship out */
165317699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1654416022c9SBarry Smith       nz = procsnz[i];
1655416022c9SBarry Smith       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1656d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1657416022c9SBarry Smith     }
16580452661fSBarry Smith     PetscFree(procsnz);
1659416022c9SBarry Smith   }
1660d65a2f8fSBarry Smith   else {
1661d65a2f8fSBarry Smith     /* receive numeric values */
16620452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1663416022c9SBarry Smith 
1664d65a2f8fSBarry Smith     /* receive message of values*/
1665d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1666d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
166702834360SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1668d65a2f8fSBarry Smith 
1669d65a2f8fSBarry Smith     /* insert into matrix */
1670d65a2f8fSBarry Smith     jj      = rstart;
1671d65a2f8fSBarry Smith     smycols = mycols;
1672d65a2f8fSBarry Smith     svals   = vals;
1673d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1674d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1675d65a2f8fSBarry Smith       smycols += ourlens[i];
1676d65a2f8fSBarry Smith       svals   += ourlens[i];
1677d65a2f8fSBarry Smith       jj++;
1678d65a2f8fSBarry Smith     }
1679d65a2f8fSBarry Smith   }
16800452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1681d65a2f8fSBarry Smith 
1682d65a2f8fSBarry Smith   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1683d65a2f8fSBarry Smith   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1684416022c9SBarry Smith   return 0;
1685416022c9SBarry Smith }
1686