xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 44cd7ae7fed5c695df79898b8d65e4c9e6fe6909)
1cb512458SBarry Smith #ifndef lint
2*44cd7ae7SLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.137 1996/04/03 14:39:22 curfman Exp curfman $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
51eb62cbbSBarry Smith #include "mpiaij.h"
68a729477SBarry Smith #include "vec/vecimpl.h"
7d6dfbf8fSBarry Smith #include "inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18416022c9SBarry Smith   int        n = B->n,i,shift = B->indexshift;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23dbb450caSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30299609e3SLois Curfman McInnes {
31299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32299609e3SLois Curfman McInnes   int        ierr;
3317699dbbSLois Curfman McInnes   if (aij->size == 1) {
3451c98ccdSLois Curfman McInnes     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
3548d91487SBarry Smith   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36299609e3SLois Curfman McInnes   return 0;
37299609e3SLois Curfman McInnes }
38299609e3SLois Curfman McInnes 
394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
408a729477SBarry Smith {
4144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
434b0e389bSBarry Smith   Scalar     value;
441eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
451eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
464b0e389bSBarry Smith   int        shift = C->indexshift,roworiented = aij->roworiented;
478a729477SBarry Smith 
4864119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
4948d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
508a729477SBarry Smith   }
511eb62cbbSBarry Smith   aij->insertmode = addv;
528a729477SBarry Smith   for ( i=0; i<m; i++ ) {
534b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
544b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
554b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
564b0e389bSBarry Smith       row = im[i] - rstart;
571eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
584b0e389bSBarry Smith         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
594b0e389bSBarry Smith         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
604b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
614b0e389bSBarry Smith           col = in[j] - cstart;
624b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
634b0e389bSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
641eb62cbbSBarry Smith         }
651eb62cbbSBarry Smith         else {
66227d817aSBarry Smith           if (mat->was_assembled) {
67b7c46309SBarry Smith             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
684b0e389bSBarry Smith             col = aij->colmap[in[j]] + shift;
69ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
702493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
714b0e389bSBarry Smith               col =  in[j];
72d6dfbf8fSBarry Smith             }
739e25ed09SBarry Smith           }
744b0e389bSBarry Smith           else col = in[j];
754b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
764b0e389bSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
771eb62cbbSBarry Smith         }
781eb62cbbSBarry Smith       }
791eb62cbbSBarry Smith     }
801eb62cbbSBarry Smith     else {
814b0e389bSBarry Smith       if (roworiented) {
824b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
834b0e389bSBarry Smith       }
844b0e389bSBarry Smith       else {
854b0e389bSBarry Smith         row = im[i];
864b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
874b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
884b0e389bSBarry Smith         }
894b0e389bSBarry Smith       }
901eb62cbbSBarry Smith     }
918a729477SBarry Smith   }
928a729477SBarry Smith   return 0;
938a729477SBarry Smith }
948a729477SBarry Smith 
95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96b49de8d1SLois Curfman McInnes {
97b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98b49de8d1SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
101b49de8d1SLois Curfman McInnes   int        shift = C->indexshift;
102b49de8d1SLois Curfman McInnes 
103b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
104b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
107b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
108b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
109b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
112b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
113b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114b49de8d1SLois Curfman McInnes         }
115b49de8d1SLois Curfman McInnes         else {
116b49de8d1SLois Curfman McInnes           col = aij->colmap[idxn[j]] + shift;
117b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
118b49de8d1SLois Curfman McInnes         }
119b49de8d1SLois Curfman McInnes       }
120b49de8d1SLois Curfman McInnes     }
121b49de8d1SLois Curfman McInnes     else {
122b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
123b49de8d1SLois Curfman McInnes     }
124b49de8d1SLois Curfman McInnes   }
125b49de8d1SLois Curfman McInnes   return 0;
126b49de8d1SLois Curfman McInnes }
127b49de8d1SLois Curfman McInnes 
128fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1298a729477SBarry Smith {
13044a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
131d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
13217699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
13317699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1341eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1356abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1361eb62cbbSBarry Smith   InsertMode  addv;
1371eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1381eb62cbbSBarry Smith 
1391eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
140d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
141dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
142bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1431eb62cbbSBarry Smith   }
1441eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1451eb62cbbSBarry Smith 
1461eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1470452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
148cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1490452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1501eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1511eb62cbbSBarry Smith     idx = aij->stash.idx[i];
15217699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1531eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1541eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1558a729477SBarry Smith       }
1568a729477SBarry Smith     }
1578a729477SBarry Smith   }
15817699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1591eb62cbbSBarry Smith 
1601eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1610452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
16217699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
16317699dbbSLois Curfman McInnes   nreceives = work[rank];
16417699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
16517699dbbSLois Curfman McInnes   nmax = work[rank];
1660452661fSBarry Smith   PetscFree(work);
1671eb62cbbSBarry Smith 
1681eb62cbbSBarry Smith   /* post receives:
1691eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1701eb62cbbSBarry Smith      (global index,value) we store the global index as a double
171d6dfbf8fSBarry Smith      to simplify the message passing.
1721eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1731eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1741eb62cbbSBarry Smith      this is a lot of wasted space.
1751eb62cbbSBarry Smith 
1761eb62cbbSBarry Smith 
1771eb62cbbSBarry Smith        This could be done better.
1781eb62cbbSBarry Smith   */
1790452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
18078b31e54SBarry Smith   CHKPTRQ(rvalues);
1810452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
18278b31e54SBarry Smith   CHKPTRQ(recv_waits);
1831eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
184416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1851eb62cbbSBarry Smith               comm,recv_waits+i);
1861eb62cbbSBarry Smith   }
1871eb62cbbSBarry Smith 
1881eb62cbbSBarry Smith   /* do sends:
1891eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1901eb62cbbSBarry Smith          the ith processor
1911eb62cbbSBarry Smith   */
1920452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1930452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
19478b31e54SBarry Smith   CHKPTRQ(send_waits);
1950452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1961eb62cbbSBarry Smith   starts[0] = 0;
19717699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1981eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1991eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2001eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2011eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2021eb62cbbSBarry Smith   }
2030452661fSBarry Smith   PetscFree(owner);
2041eb62cbbSBarry Smith   starts[0] = 0;
20517699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2061eb62cbbSBarry Smith   count = 0;
20717699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2081eb62cbbSBarry Smith     if (procs[i]) {
209416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2101eb62cbbSBarry Smith                 comm,send_waits+count++);
2111eb62cbbSBarry Smith     }
2121eb62cbbSBarry Smith   }
2130452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2141eb62cbbSBarry Smith 
2151eb62cbbSBarry Smith   /* Free cache space */
2162191d07cSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
21778b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2181eb62cbbSBarry Smith 
2191eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2201eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2211eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2221eb62cbbSBarry Smith   aij->rmax       = nmax;
2231eb62cbbSBarry Smith 
2241eb62cbbSBarry Smith   return 0;
2251eb62cbbSBarry Smith }
22644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2271eb62cbbSBarry Smith 
228fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2291eb62cbbSBarry Smith {
23044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
231dbb450caSBarry Smith   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
2321eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
233416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
234416022c9SBarry Smith   int         row,col,other_disassembled,shift = C->indexshift;
2351eb62cbbSBarry Smith   Scalar      *values,val;
2361eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2371eb62cbbSBarry Smith 
2381eb62cbbSBarry Smith   /*  wait on receives */
2391eb62cbbSBarry Smith   while (count) {
240d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2411eb62cbbSBarry Smith     /* unpack receives into our local space */
242d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
243c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2441eb62cbbSBarry Smith     n = n/3;
2451eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
246227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
247227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2481eb62cbbSBarry Smith       val = values[3*i+2];
2491eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2501eb62cbbSBarry Smith         col -= aij->cstart;
2511eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2521eb62cbbSBarry Smith       }
2531eb62cbbSBarry Smith       else {
254227d817aSBarry Smith         if (mat->was_assembled) {
255b7c46309SBarry Smith           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
256dbb450caSBarry Smith           col = aij->colmap[col] + shift;
257ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2582493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
259227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
260d6dfbf8fSBarry Smith           }
2619e25ed09SBarry Smith         }
2621eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2631eb62cbbSBarry Smith       }
2641eb62cbbSBarry Smith     }
2651eb62cbbSBarry Smith     count--;
2661eb62cbbSBarry Smith   }
2670452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2681eb62cbbSBarry Smith 
2691eb62cbbSBarry Smith   /* wait on sends */
2701eb62cbbSBarry Smith   if (aij->nsends) {
2710452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
27278b31e54SBarry Smith     CHKPTRQ(send_status);
2731eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2740452661fSBarry Smith     PetscFree(send_status);
2751eb62cbbSBarry Smith   }
2760452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
2771eb62cbbSBarry Smith 
27864119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
27978b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
28078b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
2811eb62cbbSBarry Smith 
2822493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
2832493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
284227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
285227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
2862493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
2872493cbb0SBarry Smith   }
2882493cbb0SBarry Smith 
289227d817aSBarry Smith   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
29078b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
2915e42470aSBarry Smith   }
29278b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
29378b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
2945e42470aSBarry Smith 
2957a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
2968a729477SBarry Smith   return 0;
2978a729477SBarry Smith }
2988a729477SBarry Smith 
2992ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3001eb62cbbSBarry Smith {
30144a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
302dbd7a890SLois Curfman McInnes   int        ierr;
30378b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
30478b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3051eb62cbbSBarry Smith   return 0;
3061eb62cbbSBarry Smith }
3071eb62cbbSBarry Smith 
3081eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3091eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3101eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3111eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3121eb62cbbSBarry Smith    routine.
3131eb62cbbSBarry Smith */
31444a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3151eb62cbbSBarry Smith {
31644a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
31717699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3186abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
31917699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3205392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
321d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
322d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3231eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3241eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3251eb62cbbSBarry Smith   IS             istmp;
3261eb62cbbSBarry Smith 
32777c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
32878b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3291eb62cbbSBarry Smith 
3301eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3310452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
332cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3330452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3341eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3351eb62cbbSBarry Smith     idx = rows[i];
3361eb62cbbSBarry Smith     found = 0;
33717699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3381eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3391eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3401eb62cbbSBarry Smith       }
3411eb62cbbSBarry Smith     }
342bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3431eb62cbbSBarry Smith   }
34417699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3451eb62cbbSBarry Smith 
3461eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3470452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
34817699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
34917699dbbSLois Curfman McInnes   nrecvs = work[rank];
35017699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
35117699dbbSLois Curfman McInnes   nmax = work[rank];
3520452661fSBarry Smith   PetscFree(work);
3531eb62cbbSBarry Smith 
3541eb62cbbSBarry Smith   /* post receives:   */
3550452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
35678b31e54SBarry Smith   CHKPTRQ(rvalues);
3570452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
35878b31e54SBarry Smith   CHKPTRQ(recv_waits);
3591eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
360416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3611eb62cbbSBarry Smith   }
3621eb62cbbSBarry Smith 
3631eb62cbbSBarry Smith   /* do sends:
3641eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3651eb62cbbSBarry Smith          the ith processor
3661eb62cbbSBarry Smith   */
3670452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3680452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
36978b31e54SBarry Smith   CHKPTRQ(send_waits);
3700452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3711eb62cbbSBarry Smith   starts[0] = 0;
37217699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3731eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3741eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3751eb62cbbSBarry Smith   }
3761eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3771eb62cbbSBarry Smith 
3781eb62cbbSBarry Smith   starts[0] = 0;
37917699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3801eb62cbbSBarry Smith   count = 0;
38117699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3821eb62cbbSBarry Smith     if (procs[i]) {
383416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3841eb62cbbSBarry Smith     }
3851eb62cbbSBarry Smith   }
3860452661fSBarry Smith   PetscFree(starts);
3871eb62cbbSBarry Smith 
38817699dbbSLois Curfman McInnes   base = owners[rank];
3891eb62cbbSBarry Smith 
3901eb62cbbSBarry Smith   /*  wait on receives */
3910452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3921eb62cbbSBarry Smith   source = lens + nrecvs;
3931eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
3941eb62cbbSBarry Smith   while (count) {
395d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3961eb62cbbSBarry Smith     /* unpack receives into our local space */
3971eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
398d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
399d6dfbf8fSBarry Smith     lens[imdex]  = n;
4001eb62cbbSBarry Smith     slen += n;
4011eb62cbbSBarry Smith     count--;
4021eb62cbbSBarry Smith   }
4030452661fSBarry Smith   PetscFree(recv_waits);
4041eb62cbbSBarry Smith 
4051eb62cbbSBarry Smith   /* move the data into the send scatter */
4060452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4071eb62cbbSBarry Smith   count = 0;
4081eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4091eb62cbbSBarry Smith     values = rvalues + i*nmax;
4101eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4111eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4121eb62cbbSBarry Smith     }
4131eb62cbbSBarry Smith   }
4140452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4150452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4161eb62cbbSBarry Smith 
4171eb62cbbSBarry Smith   /* actually zap the local rows */
418416022c9SBarry Smith   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
419464493b3SBarry Smith   PLogObjectParent(A,istmp);
4200452661fSBarry Smith   PetscFree(lrows);
42178b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
42278b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
42378b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4241eb62cbbSBarry Smith 
4251eb62cbbSBarry Smith   /* wait on sends */
4261eb62cbbSBarry Smith   if (nsends) {
4270452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
42878b31e54SBarry Smith     CHKPTRQ(send_status);
4291eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4300452661fSBarry Smith     PetscFree(send_status);
4311eb62cbbSBarry Smith   }
4320452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4331eb62cbbSBarry Smith 
4341eb62cbbSBarry Smith   return 0;
4351eb62cbbSBarry Smith }
4361eb62cbbSBarry Smith 
437*44cd7ae7SLois Curfman McInnes extern int MatMult_SeqAIJ(Mat A,Vec,Vec);
438*44cd7ae7SLois Curfman McInnes extern int MatMultAdd_SeqAIJ(Mat A,Vec,Vec,Vec);
439*44cd7ae7SLois Curfman McInnes extern int MatMultTrans_SeqAIJ(Mat A,Vec,Vec);
440*44cd7ae7SLois Curfman McInnes extern int MatMultTransAdd_SeqAIJ(Mat A,Vec,Vec,Vec);
441416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4421eb62cbbSBarry Smith {
443416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
4441eb62cbbSBarry Smith   int        ierr;
445416022c9SBarry Smith 
44664119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
447*44cd7ae7SLois Curfman McInnes   ierr = MatMult_SeqAIJ(a->A,xx,yy); CHKERRQ(ierr);
44864119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
449*44cd7ae7SLois Curfman McInnes   ierr = MatMultAdd_SeqAIJ(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4501eb62cbbSBarry Smith   return 0;
4511eb62cbbSBarry Smith }
4521eb62cbbSBarry Smith 
453416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
454da3a660dSBarry Smith {
455416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
456da3a660dSBarry Smith   int        ierr;
45764119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
458*44cd7ae7SLois Curfman McInnes   ierr = MatMultAdd_SeqAIJ(a->A,xx,yy,zz); CHKERRQ(ierr);
45964119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
460*44cd7ae7SLois Curfman McInnes   ierr = MatMultAdd_SeqAIJ(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
461da3a660dSBarry Smith   return 0;
462da3a660dSBarry Smith }
463da3a660dSBarry Smith 
464416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
465da3a660dSBarry Smith {
466416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
467da3a660dSBarry Smith   int        ierr;
468da3a660dSBarry Smith 
469da3a660dSBarry Smith   /* do nondiagonal part */
470*44cd7ae7SLois Curfman McInnes   ierr = MatMultTrans_SeqAIJ(a->B,xx,a->lvec); CHKERRQ(ierr);
471da3a660dSBarry Smith   /* send it on its way */
472416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
47364119d58SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
474da3a660dSBarry Smith   /* do local part */
475*44cd7ae7SLois Curfman McInnes   ierr = MatMultTrans_SeqAIJ(a->A,xx,yy); CHKERRQ(ierr);
476da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
477da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
478da3a660dSBarry Smith   /* but is not perhaps always true. */
479416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
48064119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
481da3a660dSBarry Smith   return 0;
482da3a660dSBarry Smith }
483da3a660dSBarry Smith 
484416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
485da3a660dSBarry Smith {
486416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
487da3a660dSBarry Smith   int        ierr;
488da3a660dSBarry Smith 
489da3a660dSBarry Smith   /* do nondiagonal part */
490*44cd7ae7SLois Curfman McInnes   ierr = MatMultTrans_SeqAIJ(a->B,xx,a->lvec); CHKERRQ(ierr);
491da3a660dSBarry Smith   /* send it on its way */
492416022c9SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
49364119d58SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
494da3a660dSBarry Smith   /* do local part */
495*44cd7ae7SLois Curfman McInnes   ierr = MatMultTransAdd_SeqAIJ(a->A,xx,yy,zz); CHKERRQ(ierr);
496da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
497da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
498da3a660dSBarry Smith   /* but is not perhaps always true. */
499416022c9SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
50064119d58SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
501da3a660dSBarry Smith   return 0;
502da3a660dSBarry Smith }
503da3a660dSBarry Smith 
5041eb62cbbSBarry Smith /*
5051eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5061eb62cbbSBarry Smith    diagonal block
5071eb62cbbSBarry Smith */
508416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5091eb62cbbSBarry Smith {
510416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
511*44cd7ae7SLois Curfman McInnes   if (a->M != a->N)
512*44cd7ae7SLois Curfman McInnes     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
513416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5141eb62cbbSBarry Smith }
5151eb62cbbSBarry Smith 
516052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
517052efed2SBarry Smith {
518052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
519052efed2SBarry Smith   int        ierr;
520052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
521052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
522052efed2SBarry Smith   return 0;
523052efed2SBarry Smith }
524052efed2SBarry Smith 
52544a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5261eb62cbbSBarry Smith {
5271eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
52844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5291eb62cbbSBarry Smith   int        ierr;
530a5a9c739SBarry Smith #if defined(PETSC_LOG)
5316e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
532a5a9c739SBarry Smith #endif
5330452661fSBarry Smith   PetscFree(aij->rowners);
53478b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
53578b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5360452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5370452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5381eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
539a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5407a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
5410452661fSBarry Smith   PetscFree(aij);
542a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5430452661fSBarry Smith   PetscHeaderDestroy(mat);
5441eb62cbbSBarry Smith   return 0;
5451eb62cbbSBarry Smith }
5467857610eSBarry Smith #include "draw.h"
547b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
548ee50ffe9SBarry Smith 
549416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5501eb62cbbSBarry Smith {
551416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
552416022c9SBarry Smith   int         ierr;
553416022c9SBarry Smith 
55417699dbbSLois Curfman McInnes   if (aij->size == 1) {
555416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
556416022c9SBarry Smith   }
557a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
558416022c9SBarry Smith   return 0;
559416022c9SBarry Smith }
560416022c9SBarry Smith 
561d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
562416022c9SBarry Smith {
56344a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
564dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
565a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
566d636dbe3SBarry Smith   FILE        *fd;
56719bcc07fSBarry Smith   ViewerType  vtype;
568416022c9SBarry Smith 
56919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
57019bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
57190ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
57290ace30eSBarry Smith     if (format == ASCII_FORMAT_INFO_DETAILED) {
57395e01e2fSLois Curfman McInnes       int nz, nzalloc, mem, flg;
574a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
57590ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
576a56f8943SBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
57795e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
57877c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
57995e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
58095e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
58195e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
58295e01e2fSLois Curfman McInnes          rank,aij->m,nz,nzalloc,mem);
58308480c60SBarry Smith       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
58495e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz);
58508480c60SBarry Smith       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
58695e01e2fSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz);
587a56f8943SBarry Smith       fflush(fd);
58877c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
589a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
590416022c9SBarry Smith       return 0;
591416022c9SBarry Smith     }
59290ace30eSBarry Smith     else if (format == ASCII_FORMAT_INFO) {
59308480c60SBarry Smith       return 0;
59408480c60SBarry Smith     }
595416022c9SBarry Smith   }
596416022c9SBarry Smith 
59719bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
59819bcc07fSBarry Smith     Draw       draw;
59919bcc07fSBarry Smith     PetscTruth isnull;
60019bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
60119bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
60219bcc07fSBarry Smith   }
60319bcc07fSBarry Smith 
60419bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
60590ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
60677c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
607d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
60817699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
6091eb62cbbSBarry Smith            aij->cend);
61078b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
61178b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
612d13ab20cSBarry Smith     fflush(fd);
61377c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
614d13ab20cSBarry Smith   }
615416022c9SBarry Smith   else {
616a56f8943SBarry Smith     int size = aij->size;
617a56f8943SBarry Smith     rank = aij->rank;
61817699dbbSLois Curfman McInnes     if (size == 1) {
61978b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
62095373324SBarry Smith     }
62195373324SBarry Smith     else {
62295373324SBarry Smith       /* assemble the entire matrix onto first processor. */
62395373324SBarry Smith       Mat         A;
624ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6252eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
62695373324SBarry Smith       Scalar      *a;
6272ee70a88SLois Curfman McInnes 
62817699dbbSLois Curfman McInnes       if (!rank) {
629b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
630c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
63195373324SBarry Smith       }
63295373324SBarry Smith       else {
633b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
634c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
63595373324SBarry Smith       }
636464493b3SBarry Smith       PLogObjectParent(mat,A);
637416022c9SBarry Smith 
63895373324SBarry Smith       /* copy over the A part */
639ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6402ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
64195373324SBarry Smith       row = aij->rstart;
642dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
64395373324SBarry Smith       for ( i=0; i<m; i++ ) {
644416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
64595373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
64695373324SBarry Smith       }
6472ee70a88SLois Curfman McInnes       aj = Aloc->j;
648dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
64995373324SBarry Smith 
65095373324SBarry Smith       /* copy over the B part */
651ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6522ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
65395373324SBarry Smith       row = aij->rstart;
6540452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
655dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
65695373324SBarry Smith       for ( i=0; i<m; i++ ) {
657416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
65895373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
65995373324SBarry Smith       }
6600452661fSBarry Smith       PetscFree(ct);
66178b31e54SBarry Smith       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
66278b31e54SBarry Smith       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
66317699dbbSLois Curfman McInnes       if (!rank) {
66478b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
66595373324SBarry Smith       }
66678b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
66795373324SBarry Smith     }
66895373324SBarry Smith   }
6691eb62cbbSBarry Smith   return 0;
6701eb62cbbSBarry Smith }
6711eb62cbbSBarry Smith 
672416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
673416022c9SBarry Smith {
674416022c9SBarry Smith   Mat         mat = (Mat) obj;
675416022c9SBarry Smith   int         ierr;
67619bcc07fSBarry Smith   ViewerType  vtype;
677416022c9SBarry Smith 
678416022c9SBarry Smith   if (!viewer) {
67919bcc07fSBarry Smith     viewer = STDOUT_VIEWER_SELF;
680416022c9SBarry Smith   }
68119bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
68219bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
68319bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
684d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
685416022c9SBarry Smith   }
68619bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
687416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
688416022c9SBarry Smith   }
689416022c9SBarry Smith   return 0;
690416022c9SBarry Smith }
691416022c9SBarry Smith 
692ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat);
693*44cd7ae7SLois Curfman McInnes extern int MatRelax_SeqAIJ(Mat,Vec,double,MatSORType,double,int,Vec);
6941eb62cbbSBarry Smith /*
6951eb62cbbSBarry Smith     This has to provide several versions.
6961eb62cbbSBarry Smith 
6971eb62cbbSBarry Smith      1) per sequential
6981eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
6991eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
700d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
7011eb62cbbSBarry Smith */
702fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
703dbb450caSBarry Smith                            double fshift,int its,Vec xx)
7048a729477SBarry Smith {
70544a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
706d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
707ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
7086abc6512SBarry Smith   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
7096abc6512SBarry Smith   int        ierr,*idx, *diag;
710416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
711da3a660dSBarry Smith   Vec        tt;
7128a729477SBarry Smith 
713d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
714dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
715dbb450caSBarry Smith   ls = ls + shift;
716ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
717d6dfbf8fSBarry Smith   diag = A->diag;
718acb40c82SBarry Smith   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
71948d91487SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
720acb40c82SBarry Smith   }
721da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
722da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
723da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
724da3a660dSBarry Smith 
725da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
726da3a660dSBarry Smith 
727da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
728da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
729da3a660dSBarry Smith     is the relaxation factor.
730da3a660dSBarry Smith     */
73178b31e54SBarry Smith     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
732464493b3SBarry Smith     PLogObjectParent(matin,tt);
733da3a660dSBarry Smith     VecGetArray(tt,&t);
734da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
735da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
736da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
73764119d58SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
73878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
739da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
740da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
741dbb450caSBarry Smith       idx  = A->j + diag[i] + !shift;
742dbb450caSBarry Smith       v    = A->a + diag[i] + !shift;
743da3a660dSBarry Smith       sum  = b[i];
744da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
745dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
746da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
747dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
748dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
749da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
750da3a660dSBarry Smith       x[i] = omega*(sum/d);
751da3a660dSBarry Smith     }
75264119d58SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
75378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
754da3a660dSBarry Smith 
755da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
756da3a660dSBarry Smith     v = A->a;
757dbb450caSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
758da3a660dSBarry Smith 
759da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
760dbb450caSBarry Smith     ts = t + shift; /* shifted by one for index start of a or mat->j*/
761da3a660dSBarry Smith     diag = A->diag;
762da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
76364119d58SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
76478b31e54SBarry Smith                                                  mat->Mvctx); CHKERRQ(ierr);
765da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
766da3a660dSBarry Smith       n    = diag[i] - A->i[i];
767dbb450caSBarry Smith       idx  = A->j + A->i[i] + shift;
768dbb450caSBarry Smith       v    = A->a + A->i[i] + shift;
769da3a660dSBarry Smith       sum  = t[i];
770da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
771dbb450caSBarry Smith       d    = fshift + A->a[diag[i]+shift];
772da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
773dbb450caSBarry Smith       idx  = B->j + B->i[i] + shift;
774dbb450caSBarry Smith       v    = B->a + B->i[i] + shift;
775da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
776da3a660dSBarry Smith       t[i] = omega*(sum/d);
777da3a660dSBarry Smith     }
77864119d58SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
77978b31e54SBarry Smith                                                     mat->Mvctx); CHKERRQ(ierr);
780da3a660dSBarry Smith     /*  x = x + t */
781da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
782da3a660dSBarry Smith     VecDestroy(tt);
783da3a660dSBarry Smith     return 0;
784da3a660dSBarry Smith   }
785da3a660dSBarry Smith 
7861eb62cbbSBarry Smith 
787d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
788da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
789da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
790da3a660dSBarry Smith     }
791da3a660dSBarry Smith     else {
79264119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
79378b31e54SBarry Smith       CHKERRQ(ierr);
79464119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
79578b31e54SBarry Smith       CHKERRQ(ierr);
796da3a660dSBarry Smith     }
797d6dfbf8fSBarry Smith     while (its--) {
798d6dfbf8fSBarry Smith       /* go down through the rows */
79964119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
80078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
801d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
802d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
803dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
804dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
805d6dfbf8fSBarry Smith         sum  = b[i];
806d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
807dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
808d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
809dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
810dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
811d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
812d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
813d6dfbf8fSBarry Smith       }
81464119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
81578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
816d6dfbf8fSBarry Smith       /* come up through the rows */
81764119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
81878b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
819d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
820d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
821dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
822dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
823d6dfbf8fSBarry Smith         sum  = b[i];
824d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
825dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
826d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
827dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
828dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
829d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
830d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
831d6dfbf8fSBarry Smith       }
83264119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
83378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
834d6dfbf8fSBarry Smith     }
835d6dfbf8fSBarry Smith   }
836d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
837da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
838da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
83964119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
84078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
841da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
842da3a660dSBarry Smith         n    = diag[i] - A->i[i];
843dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
844dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
845da3a660dSBarry Smith         sum  = b[i];
846da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
847dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
848da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
849dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
850dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
851da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
852da3a660dSBarry Smith         x[i] = omega*(sum/d);
853da3a660dSBarry Smith       }
85464119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
85578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
856da3a660dSBarry Smith       its--;
857da3a660dSBarry Smith     }
858d6dfbf8fSBarry Smith     while (its--) {
85964119d58SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
86078b31e54SBarry Smith       CHKERRQ(ierr);
86164119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
86278b31e54SBarry Smith       CHKERRQ(ierr);
86364119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
86478b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
865d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
866d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
867dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
868dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
869d6dfbf8fSBarry Smith         sum  = b[i];
870d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
871dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
872d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
873dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
874dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
875d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
876d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
877d6dfbf8fSBarry Smith       }
87864119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
87978b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
880d6dfbf8fSBarry Smith     }
881d6dfbf8fSBarry Smith   }
882d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
883da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
884da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
88564119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
88678b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
887da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
888da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
889dbb450caSBarry Smith         idx  = A->j + diag[i] + !shift;
890dbb450caSBarry Smith         v    = A->a + diag[i] + !shift;
891da3a660dSBarry Smith         sum  = b[i];
892da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
893dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
894da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
895dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
896dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
897da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
898da3a660dSBarry Smith         x[i] = omega*(sum/d);
899da3a660dSBarry Smith       }
90064119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
90178b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
902da3a660dSBarry Smith       its--;
903da3a660dSBarry Smith     }
904d6dfbf8fSBarry Smith     while (its--) {
90564119d58SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
90678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
90764119d58SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
90878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
90964119d58SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
91078b31e54SBarry Smith                               mat->Mvctx); CHKERRQ(ierr);
911d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
912d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
913dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
914dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
915d6dfbf8fSBarry Smith         sum  = b[i];
916d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
917dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
918d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
919dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
920dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
921d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
922d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
923d6dfbf8fSBarry Smith       }
92464119d58SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
92578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
926d6dfbf8fSBarry Smith     }
927d6dfbf8fSBarry Smith   }
928d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
929da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
930*44cd7ae7SLois Curfman McInnes       return MatRelax_SeqAIJ(mat->A,bb,omega,flag,fshift,its,xx);
931da3a660dSBarry Smith     }
93264119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
93378b31e54SBarry Smith     CHKERRQ(ierr);
93464119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
93578b31e54SBarry Smith     CHKERRQ(ierr);
936d6dfbf8fSBarry Smith     while (its--) {
937d6dfbf8fSBarry Smith       /* go down through the rows */
938d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
939d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
940dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
941dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
942d6dfbf8fSBarry Smith         sum  = b[i];
943d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
944dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
945d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
946dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
947dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
948d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
949d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
950d6dfbf8fSBarry Smith       }
951d6dfbf8fSBarry Smith       /* come up through the rows */
952d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
953d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
954dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
955dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
956d6dfbf8fSBarry Smith         sum  = b[i];
957d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
958dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
959d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
960dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
961dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
962d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
963d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
964d6dfbf8fSBarry Smith       }
965d6dfbf8fSBarry Smith     }
966d6dfbf8fSBarry Smith   }
967d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
968da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
969*44cd7ae7SLois Curfman McInnes       return MatRelax_SeqAIJ(mat->A,bb,omega,flag,fshift,its,xx);
970da3a660dSBarry Smith     }
97164119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
97278b31e54SBarry Smith     CHKERRQ(ierr);
97364119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
97478b31e54SBarry Smith     CHKERRQ(ierr);
975d6dfbf8fSBarry Smith     while (its--) {
976d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
977d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
978dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
979dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
980d6dfbf8fSBarry Smith         sum  = b[i];
981d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
982dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
983d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
984dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
985dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
986d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
987d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
988d6dfbf8fSBarry Smith       }
989d6dfbf8fSBarry Smith     }
990d6dfbf8fSBarry Smith   }
991d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
992da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
993*44cd7ae7SLois Curfman McInnes       return MatRelax_SeqAIJ(mat->A,bb,omega,flag,fshift,its,xx);
994da3a660dSBarry Smith     }
99564119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
99678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
99764119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
99878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
999d6dfbf8fSBarry Smith     while (its--) {
1000d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
1001d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
1002dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
1003dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
1004d6dfbf8fSBarry Smith         sum  = b[i];
1005d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1006dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
1007d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
1008dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
1009dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
1010d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1011d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1012d6dfbf8fSBarry Smith       }
1013d6dfbf8fSBarry Smith     }
1014d6dfbf8fSBarry Smith   }
10158a729477SBarry Smith   return 0;
10168a729477SBarry Smith }
1017a66be287SLois Curfman McInnes 
1018fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1019fc76ce87SLois Curfman McInnes                              int *nzalloc,int *mem)
1020a66be287SLois Curfman McInnes {
1021a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1022a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
1023a66be287SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1024a66be287SLois Curfman McInnes 
102578b31e54SBarry Smith   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
102678b31e54SBarry Smith   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1027a66be287SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1028a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
1029bcd2baecSBarry Smith     if (nz)       *nz      = isend[0];
1030bcd2baecSBarry Smith     if (nzalloc)  *nzalloc = isend[1];
1031bcd2baecSBarry Smith     if (mem)      *mem     = isend[2];
1032a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1033d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1034bcd2baecSBarry Smith     if (nz)      *nz      = irecv[0];
1035bcd2baecSBarry Smith     if (nzalloc) *nzalloc = irecv[1];
1036bcd2baecSBarry Smith     if (mem)     *mem     = irecv[2];
1037a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1038d65a2f8fSBarry Smith     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1039bcd2baecSBarry Smith     if (nz)      *nz      = irecv[0];
1040bcd2baecSBarry Smith     if (nzalloc) *nzalloc = irecv[1];
1041bcd2baecSBarry Smith     if (mem)     *mem     = irecv[2];
1042a66be287SLois Curfman McInnes   }
1043a66be287SLois Curfman McInnes   return 0;
1044a66be287SLois Curfman McInnes }
1045a66be287SLois Curfman McInnes 
1046299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1047299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1048299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1049299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1050299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1051299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1052299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1053299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1054299609e3SLois Curfman McInnes 
1055416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1056c74985f6SBarry Smith {
1057c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1058c74985f6SBarry Smith 
1059c0bbcb79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
1060c0bbcb79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
1061c0bbcb79SLois Curfman McInnes       op == COLUMNS_SORTED ||
1062c0bbcb79SLois Curfman McInnes       op == ROW_ORIENTED) {
1063c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
1064c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
1065c74985f6SBarry Smith   }
1066c0bbcb79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
1067c0bbcb79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
1068c0bbcb79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1069c0bbcb79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
107094a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
10714b0e389bSBarry Smith   else if (op == COLUMN_ORIENTED) {
10724b0e389bSBarry Smith     a->roworiented = 0;
10734b0e389bSBarry Smith     MatSetOption(a->A,op);
10744b0e389bSBarry Smith     MatSetOption(a->B,op);
10754b0e389bSBarry Smith   }
1076c0bbcb79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
10774d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1078c0bbcb79SLois Curfman McInnes   else
10794d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1080c74985f6SBarry Smith   return 0;
1081c74985f6SBarry Smith }
1082c74985f6SBarry Smith 
1083d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1084c74985f6SBarry Smith {
108544a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1086c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
1087c74985f6SBarry Smith   return 0;
1088c74985f6SBarry Smith }
1089c74985f6SBarry Smith 
1090d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1091c74985f6SBarry Smith {
109244a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1093b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
1094c74985f6SBarry Smith   return 0;
1095c74985f6SBarry Smith }
1096c74985f6SBarry Smith 
1097d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1098c74985f6SBarry Smith {
109944a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1100c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
1101c74985f6SBarry Smith   return 0;
1102c74985f6SBarry Smith }
1103c74985f6SBarry Smith 
11046d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11056d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11066d84be18SBarry Smith 
11076d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
110839e00950SLois Curfman McInnes {
1109154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
111070f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1111154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1112154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
111370f0671dSBarry Smith   int        *cmap, *idx_p;
111439e00950SLois Curfman McInnes 
11157a0afa10SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
11167a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
11177a0afa10SBarry Smith 
111870f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
11197a0afa10SBarry Smith     /*
11207a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
11217a0afa10SBarry Smith     */
11227a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
11237a0afa10SBarry Smith     int     max = 1,n = mat->n,tmp;
11247a0afa10SBarry Smith     for ( i=0; i<n; i++ ) {
11257a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
11267a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
11277a0afa10SBarry Smith     }
11287a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
11297a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
11307a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
11317a0afa10SBarry Smith   }
11327a0afa10SBarry Smith 
11337a0afa10SBarry Smith 
1134b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1135abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
113639e00950SLois Curfman McInnes 
1137154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1138154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1139154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
11406d84be18SBarry Smith   ierr = MatGetRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
11416d84be18SBarry Smith   ierr = MatGetRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1142154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1143154123eaSLois Curfman McInnes 
114470f0671dSBarry Smith   cmap  = mat->garray;
1145154123eaSLois Curfman McInnes   if (v  || idx) {
1146154123eaSLois Curfman McInnes     if (nztot) {
1147154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
114870f0671dSBarry Smith       int imark = -1;
1149154123eaSLois Curfman McInnes       if (v) {
115070f0671dSBarry Smith         *v = v_p = mat->rowvalues;
115139e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
115270f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1153154123eaSLois Curfman McInnes           else break;
1154154123eaSLois Curfman McInnes         }
1155154123eaSLois Curfman McInnes         imark = i;
115670f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
115770f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1158154123eaSLois Curfman McInnes       }
1159154123eaSLois Curfman McInnes       if (idx) {
116070f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
116170f0671dSBarry Smith         if (imark > -1) {
116270f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
116370f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
116470f0671dSBarry Smith           }
116570f0671dSBarry Smith         } else {
1166154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
116770f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1168154123eaSLois Curfman McInnes             else break;
1169154123eaSLois Curfman McInnes           }
1170154123eaSLois Curfman McInnes           imark = i;
117170f0671dSBarry Smith         }
117270f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
117370f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
117439e00950SLois Curfman McInnes       }
117539e00950SLois Curfman McInnes     }
117639e00950SLois Curfman McInnes     else {*idx = 0; *v=0;}
1177154123eaSLois Curfman McInnes   }
117839e00950SLois Curfman McInnes   *nz = nztot;
11796d84be18SBarry Smith   ierr = MatRestoreRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
11806d84be18SBarry Smith   ierr = MatRestoreRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
118139e00950SLois Curfman McInnes   return 0;
118239e00950SLois Curfman McInnes }
118339e00950SLois Curfman McInnes 
11846d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
118539e00950SLois Curfman McInnes {
11867a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
11877a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
11887a0afa10SBarry Smith     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
11897a0afa10SBarry Smith   }
11907a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
119139e00950SLois Curfman McInnes   return 0;
119239e00950SLois Curfman McInnes }
119339e00950SLois Curfman McInnes 
1194cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1195855ac2c5SLois Curfman McInnes {
1196855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1197ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1198416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1199416022c9SBarry Smith   double     sum = 0.0;
120004ca555eSLois Curfman McInnes   Scalar     *v;
120104ca555eSLois Curfman McInnes 
120217699dbbSLois Curfman McInnes   if (aij->size == 1) {
120314183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
120437fa93a5SLois Curfman McInnes   } else {
120504ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
120604ca555eSLois Curfman McInnes       v = amat->a;
120704ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
120804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
120904ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
121004ca555eSLois Curfman McInnes #else
121104ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
121204ca555eSLois Curfman McInnes #endif
121304ca555eSLois Curfman McInnes       }
121404ca555eSLois Curfman McInnes       v = bmat->a;
121504ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
121604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
121704ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
121804ca555eSLois Curfman McInnes #else
121904ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
122004ca555eSLois Curfman McInnes #endif
122104ca555eSLois Curfman McInnes       }
12226d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
122304ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
122404ca555eSLois Curfman McInnes     }
122504ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
122604ca555eSLois Curfman McInnes       double *tmp, *tmp2;
122704ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
12280452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
12290452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1230cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
123104ca555eSLois Curfman McInnes       *norm = 0.0;
123204ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
123304ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1234579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
123504ca555eSLois Curfman McInnes       }
123604ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
123704ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1238579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
123904ca555eSLois Curfman McInnes       }
12406d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
124104ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
124204ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
124304ca555eSLois Curfman McInnes       }
12440452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
124504ca555eSLois Curfman McInnes     }
124604ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1247515d9167SLois Curfman McInnes       double ntemp = 0.0;
124804ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1249dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
125004ca555eSLois Curfman McInnes         sum = 0.0;
125104ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1252cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
125304ca555eSLois Curfman McInnes         }
1254dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
125504ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1256cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
125704ca555eSLois Curfman McInnes         }
1258515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
125904ca555eSLois Curfman McInnes       }
12606d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
126104ca555eSLois Curfman McInnes     }
126204ca555eSLois Curfman McInnes     else {
1263515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
126404ca555eSLois Curfman McInnes     }
126537fa93a5SLois Curfman McInnes   }
1266855ac2c5SLois Curfman McInnes   return 0;
1267855ac2c5SLois Curfman McInnes }
1268855ac2c5SLois Curfman McInnes 
12690de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1270b7c46309SBarry Smith {
1271b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1272dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1273416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1274416022c9SBarry Smith   Mat        B;
1275b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1276b7c46309SBarry Smith   Scalar     *array;
1277b7c46309SBarry Smith 
12783638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
12793638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1280b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1281b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1282b7c46309SBarry Smith 
1283b7c46309SBarry Smith   /* copy over the A part */
1284ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1285b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1286b7c46309SBarry Smith   row = a->rstart;
1287dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1288b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1289416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1290b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1291b7c46309SBarry Smith   }
1292b7c46309SBarry Smith   aj = Aloc->j;
12934af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1294b7c46309SBarry Smith 
1295b7c46309SBarry Smith   /* copy over the B part */
1296ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1297b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1298b7c46309SBarry Smith   row = a->rstart;
12990452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1300dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1301b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1302416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1303b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1304b7c46309SBarry Smith   }
13050452661fSBarry Smith   PetscFree(ct);
1306b7c46309SBarry Smith   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1307b7c46309SBarry Smith   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
13083638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
13090de55854SLois Curfman McInnes     *matout = B;
13100de55854SLois Curfman McInnes   } else {
13110de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
13120452661fSBarry Smith     PetscFree(a->rowners);
13130de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
13140de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
13150452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
13160452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
13170de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1318a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
13190452661fSBarry Smith     PetscFree(a);
1320416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
13210452661fSBarry Smith     PetscHeaderDestroy(B);
13220de55854SLois Curfman McInnes   }
1323b7c46309SBarry Smith   return 0;
1324b7c46309SBarry Smith }
1325b7c46309SBarry Smith 
1326682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1327682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1328682d7d0cSBarry Smith {
1329682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1330682d7d0cSBarry Smith 
1331682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1332682d7d0cSBarry Smith   else return 0;
1333682d7d0cSBarry Smith }
1334682d7d0cSBarry Smith 
1335fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
13363d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
13372f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1338598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
13398a729477SBarry Smith /* -------------------------------------------------------------------*/
13402ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
134139e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
134244a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
134344a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1344299609e3SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1345299609e3SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1346299609e3SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
134744a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1348b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1349a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1350855ac2c5SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1351ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13521eb62cbbSBarry Smith        0,
1353299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1354299609e3SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1355299609e3SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1356d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1357299609e3SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
13583d1612f7SBarry Smith        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1359b49de8d1SLois Curfman McInnes        0,0,0,
1360598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1361052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
1362052efed2SBarry Smith        MatScale_MPIAIJ};
13638a729477SBarry Smith 
13641987afe7SBarry Smith /*@C
1365ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
13663a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
13673a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
13683a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
13693a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
13708a729477SBarry Smith 
13718a729477SBarry Smith    Input Parameters:
13721eb62cbbSBarry Smith .  comm - MPI communicator
13737d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
13747d3e4905SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
13757d3e4905SLois Curfman McInnes            if N is given)
13767d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
13777d3e4905SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
13787d3e4905SLois Curfman McInnes            if n is given)
1379ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1380ff756334SLois Curfman McInnes            (same for all local rows)
1381ab693e5aSLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1382ab693e5aSLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1383ab693e5aSLois Curfman McInnes            for the diagonal entry even if it is zero.
1384ab693e5aSLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1385ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
1386ab693e5aSLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1387ab693e5aSLois Curfman McInnes            submatrix or null (possibly different for each row).
13888a729477SBarry Smith 
1389ff756334SLois Curfman McInnes    Output Parameter:
1390*44cd7ae7SLois Curfman McInnes .  A - the matrix
13918a729477SBarry Smith 
1392ff756334SLois Curfman McInnes    Notes:
1393ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1394ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
13950002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
13960002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1397ff756334SLois Curfman McInnes 
1398ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1399ff756334SLois Curfman McInnes    (possibly both).
1400ff756334SLois Curfman McInnes 
14015511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
14025511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
14035511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
14045511cfe3SLois Curfman McInnes 
14055511cfe3SLois Curfman McInnes    Options Database Keys:
14065511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
14076e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
14086e62573dSLois Curfman McInnes $        (max limit=5)
14096e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
14106e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
14116e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
14125511cfe3SLois Curfman McInnes 
1413e0245417SLois Curfman McInnes    Storage Information:
1414e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1415e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1416e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1417e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1418e0245417SLois Curfman McInnes 
1419e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
14205ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
14215ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
14225ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
14235ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1424ff756334SLois Curfman McInnes 
14255511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
14265511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
14272191d07cSBarry Smith 
1428b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1429b810aeb4SBarry Smith $         -------------------
14305511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
14315511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
14325511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
14335511cfe3SLois Curfman McInnes $         -------------------
1434b810aeb4SBarry Smith $
1435b810aeb4SBarry Smith 
14365511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
14375511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
14385511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
14395511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
14405511cfe3SLois Curfman McInnes 
14415511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
14425511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
14435511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
14443a511b96SLois Curfman McInnes    one expects d_nz >> o_nz.   For additional details, see the users manual
14453a511b96SLois Curfman McInnes    chapter on matrices and the file $(PETSC_DIR)/Performance.
14463a511b96SLois Curfman McInnes 
1447dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1448ff756334SLois Curfman McInnes 
1449fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
14508a729477SBarry Smith @*/
14511eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1452*44cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
14538a729477SBarry Smith {
1454*44cd7ae7SLois Curfman McInnes   Mat          B;
1455*44cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
14566abc6512SBarry Smith   int          ierr, i,sum[2],work[2];
1457416022c9SBarry Smith 
1458*44cd7ae7SLois Curfman McInnes   *A = 0;
1459*44cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1460*44cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
1461*44cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1462*44cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1463*44cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1464*44cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
1465*44cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
1466*44cd7ae7SLois Curfman McInnes   B->factor     = 0;
1467*44cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
1468d6dfbf8fSBarry Smith 
1469*44cd7ae7SLois Curfman McInnes   b->insertmode = NOT_SET_VALUES;
1470*44cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
1471*44cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
14721eb62cbbSBarry Smith 
1473b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
14742e0155e0SLois Curfman McInnes     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
14751987afe7SBarry Smith 
1476dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
14771eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1478d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1479dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1480dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
14811eb62cbbSBarry Smith   }
1482*44cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1483*44cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1484*44cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
1485*44cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
1486*44cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
1487*44cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
14881eb62cbbSBarry Smith 
14891eb62cbbSBarry Smith   /* build local table of row and column ownerships */
1490*44cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1491*44cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1492*44cd7ae7SLois Curfman McInnes   b->cowners = b->rowners + b->size + 1;
1493*44cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1494*44cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
1495*44cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
1496*44cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
14978a729477SBarry Smith   }
1498*44cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
1499*44cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
1500*44cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1501*44cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
1502*44cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
1503*44cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
15048a729477SBarry Smith   }
1505*44cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
1506*44cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
15078a729477SBarry Smith 
15085ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1509*44cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1510*44cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
15117b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1512*44cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1513*44cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
15148a729477SBarry Smith 
15151eb62cbbSBarry Smith   /* build cache for off array entries formed */
1516*44cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1517*44cd7ae7SLois Curfman McInnes   b->colmap      = 0;
1518*44cd7ae7SLois Curfman McInnes   b->garray      = 0;
1519*44cd7ae7SLois Curfman McInnes   b->roworiented = 1;
15208a729477SBarry Smith 
15211eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
1522*44cd7ae7SLois Curfman McInnes   b->lvec      = 0;
1523*44cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
15248a729477SBarry Smith 
15257a0afa10SBarry Smith   /* stuff for MatGetRow() */
1526*44cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
1527*44cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
1528*44cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
15297a0afa10SBarry Smith 
1530*44cd7ae7SLois Curfman McInnes   *A = B;
1531d6dfbf8fSBarry Smith   return 0;
1532d6dfbf8fSBarry Smith }
1533c74985f6SBarry Smith 
15343d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1535d6dfbf8fSBarry Smith {
1536d6dfbf8fSBarry Smith   Mat        mat;
1537416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
153825cdf11fSBarry Smith   int        ierr, len,flg;
1539d6dfbf8fSBarry Smith 
1540416022c9SBarry Smith   *newmat       = 0;
15410452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1542a5a9c739SBarry Smith   PLogObjectCreate(mat);
15430452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1544416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
154544a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
154644a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1547d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1548c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1549d6dfbf8fSBarry Smith 
1550*44cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
1551*44cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
1552*44cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
1553*44cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1554d6dfbf8fSBarry Smith 
1555416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1556416022c9SBarry Smith   a->rend         = oldmat->rend;
1557416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1558416022c9SBarry Smith   a->cend         = oldmat->cend;
155917699dbbSLois Curfman McInnes   a->size         = oldmat->size;
156017699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
156164119d58SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
1562bcd2baecSBarry Smith   a->rowvalues    = 0;
1563bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1564d6dfbf8fSBarry Smith 
15650452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
156617699dbbSLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
156717699dbbSLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1568416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
15692ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
15700452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1571416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1572416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1573416022c9SBarry Smith   } else a->colmap = 0;
1574ec8511deSBarry Smith   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
15750452661fSBarry Smith     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1576464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
1577416022c9SBarry Smith     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1578416022c9SBarry Smith   } else a->garray = 0;
1579d6dfbf8fSBarry Smith 
1580416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1581416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1582a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1583416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1584416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1585416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1586416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1587416022c9SBarry Smith   PLogObjectParent(mat,a->B);
15885dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
158925cdf11fSBarry Smith   if (flg) {
1590682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1591682d7d0cSBarry Smith   }
15928a729477SBarry Smith   *newmat = mat;
15938a729477SBarry Smith   return 0;
15948a729477SBarry Smith }
1595416022c9SBarry Smith 
159677c4ece6SBarry Smith #include "sys.h"
1597416022c9SBarry Smith 
159819bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1599416022c9SBarry Smith {
1600d65a2f8fSBarry Smith   Mat          A;
1601416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1602d65a2f8fSBarry Smith   Scalar       *vals,*svals;
160319bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1604416022c9SBarry Smith   MPI_Status   status;
160517699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1606d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
160719bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1608416022c9SBarry Smith 
160917699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
161017699dbbSLois Curfman McInnes   if (!rank) {
161190ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
161277c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1613c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1614416022c9SBarry Smith   }
1615416022c9SBarry Smith 
1616416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1617416022c9SBarry Smith   M = header[1]; N = header[2];
1618416022c9SBarry Smith   /* determine ownership of all rows */
161917699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
16200452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1621416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1622416022c9SBarry Smith   rowners[0] = 0;
162317699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1624416022c9SBarry Smith     rowners[i] += rowners[i-1];
1625416022c9SBarry Smith   }
162617699dbbSLois Curfman McInnes   rstart = rowners[rank];
162717699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1628416022c9SBarry Smith 
1629416022c9SBarry Smith   /* distribute row lengths to all processors */
16300452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1631416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
163217699dbbSLois Curfman McInnes   if (!rank) {
16330452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
163477c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
16350452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
163617699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1637d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
16380452661fSBarry Smith     PetscFree(sndcounts);
1639416022c9SBarry Smith   }
1640416022c9SBarry Smith   else {
1641416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1642416022c9SBarry Smith   }
1643416022c9SBarry Smith 
164417699dbbSLois Curfman McInnes   if (!rank) {
1645416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
16460452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1647cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
164817699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1649416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1650416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1651416022c9SBarry Smith       }
1652416022c9SBarry Smith     }
16530452661fSBarry Smith     PetscFree(rowlengths);
1654416022c9SBarry Smith 
1655416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1656416022c9SBarry Smith     maxnz = 0;
165717699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
16580452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1659416022c9SBarry Smith     }
16600452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1661416022c9SBarry Smith 
1662416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1663416022c9SBarry Smith     nz = procsnz[0];
16640452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
166577c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1666d65a2f8fSBarry Smith 
1667d65a2f8fSBarry Smith     /* read in every one elses and ship off */
166817699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1669d65a2f8fSBarry Smith       nz = procsnz[i];
167077c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1671d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1672d65a2f8fSBarry Smith     }
16730452661fSBarry Smith     PetscFree(cols);
1674416022c9SBarry Smith   }
1675416022c9SBarry Smith   else {
1676416022c9SBarry Smith     /* determine buffer space needed for message */
1677416022c9SBarry Smith     nz = 0;
1678416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1679416022c9SBarry Smith       nz += ourlens[i];
1680416022c9SBarry Smith     }
16810452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1682416022c9SBarry Smith 
1683416022c9SBarry Smith     /* receive message of column indices*/
1684d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1685416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1686c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1687416022c9SBarry Smith   }
1688416022c9SBarry Smith 
1689416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1690cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1691416022c9SBarry Smith   jj = 0;
1692416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1693416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1694d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1695416022c9SBarry Smith       jj++;
1696416022c9SBarry Smith     }
1697416022c9SBarry Smith   }
1698d65a2f8fSBarry Smith 
1699d65a2f8fSBarry Smith   /* create our matrix */
1700416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1701416022c9SBarry Smith     ourlens[i] -= offlens[i];
1702416022c9SBarry Smith   }
1703d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1704d65a2f8fSBarry Smith   A = *newmat;
1705d65a2f8fSBarry Smith   MatSetOption(A,COLUMNS_SORTED);
1706d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1707d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1708d65a2f8fSBarry Smith   }
1709416022c9SBarry Smith 
171017699dbbSLois Curfman McInnes   if (!rank) {
17110452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1712416022c9SBarry Smith 
1713416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1714416022c9SBarry Smith     nz = procsnz[0];
171577c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1716d65a2f8fSBarry Smith 
1717d65a2f8fSBarry Smith     /* insert into matrix */
1718d65a2f8fSBarry Smith     jj      = rstart;
1719d65a2f8fSBarry Smith     smycols = mycols;
1720d65a2f8fSBarry Smith     svals   = vals;
1721d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1722d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1723d65a2f8fSBarry Smith       smycols += ourlens[i];
1724d65a2f8fSBarry Smith       svals   += ourlens[i];
1725d65a2f8fSBarry Smith       jj++;
1726416022c9SBarry Smith     }
1727416022c9SBarry Smith 
1728d65a2f8fSBarry Smith     /* read in other processors and ship out */
172917699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1730416022c9SBarry Smith       nz = procsnz[i];
173177c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1732d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1733416022c9SBarry Smith     }
17340452661fSBarry Smith     PetscFree(procsnz);
1735416022c9SBarry Smith   }
1736d65a2f8fSBarry Smith   else {
1737d65a2f8fSBarry Smith     /* receive numeric values */
17380452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1739416022c9SBarry Smith 
1740d65a2f8fSBarry Smith     /* receive message of values*/
1741d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1742d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1743c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1744d65a2f8fSBarry Smith 
1745d65a2f8fSBarry Smith     /* insert into matrix */
1746d65a2f8fSBarry Smith     jj      = rstart;
1747d65a2f8fSBarry Smith     smycols = mycols;
1748d65a2f8fSBarry Smith     svals   = vals;
1749d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1750d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1751d65a2f8fSBarry Smith       smycols += ourlens[i];
1752d65a2f8fSBarry Smith       svals   += ourlens[i];
1753d65a2f8fSBarry Smith       jj++;
1754d65a2f8fSBarry Smith     }
1755d65a2f8fSBarry Smith   }
17560452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1757d65a2f8fSBarry Smith 
1758d65a2f8fSBarry Smith   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1759d65a2f8fSBarry Smith   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1760416022c9SBarry Smith   return 0;
1761416022c9SBarry Smith }
1762