xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 39ddd567ee2cd5629f9b2e9aadacf11d968d0842)
18965ea79SLois Curfman McInnes #ifndef lint
2*39ddd567SLois Curfman McInnes static char vcid[] = "$Id: mpidense.c,v 1.1 1995/10/19 22:14:22 curfman Exp curfman $";
38965ea79SLois Curfman McInnes #endif
48965ea79SLois Curfman McInnes 
58965ea79SLois Curfman McInnes #include "mpidense.h"
68965ea79SLois Curfman McInnes #include "vec/vecimpl.h"
78965ea79SLois Curfman McInnes #include "inline/spops.h"
88965ea79SLois Curfman McInnes 
9*39ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
108965ea79SLois Curfman McInnes                                int *idxn,Scalar *v,InsertMode addv)
118965ea79SLois Curfman McInnes {
12*39ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
13*39ddd567SLois Curfman McInnes   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
148965ea79SLois Curfman McInnes 
15*39ddd567SLois Curfman McInnes   if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) {
16*39ddd567SLois Curfman McInnes     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
178965ea79SLois Curfman McInnes   }
18*39ddd567SLois Curfman McInnes   mdn->insertmode = addv;
198965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
20*39ddd567SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
21*39ddd567SLois Curfman McInnes     if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
228965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
238965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
248965ea79SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
25*39ddd567SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
26*39ddd567SLois Curfman McInnes         if (idxn[j] >= mdn->N)
27*39ddd567SLois Curfman McInnes           SETERRQ(1,"MatSetValues_MPIDense:Column too large");
28*39ddd567SLois Curfman McInnes         ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv);
29*39ddd567SLois Curfman McInnes         CHKERRQ(ierr);
308965ea79SLois Curfman McInnes       }
318965ea79SLois Curfman McInnes     }
328965ea79SLois Curfman McInnes     else {
33*39ddd567SLois Curfman McInnes       ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv);
34*39ddd567SLois Curfman McInnes       CHKERRQ(ierr);
358965ea79SLois Curfman McInnes     }
368965ea79SLois Curfman McInnes   }
378965ea79SLois Curfman McInnes   return 0;
388965ea79SLois Curfman McInnes }
398965ea79SLois Curfman McInnes 
40*39ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
418965ea79SLois Curfman McInnes {
42*39ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
438965ea79SLois Curfman McInnes   MPI_Comm     comm = mat->comm;
44*39ddd567SLois Curfman McInnes   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
458965ea79SLois Curfman McInnes   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
46*39ddd567SLois Curfman McInnes   int          tag = mat->tag, *owner,*starts,count,ierr;
478965ea79SLois Curfman McInnes   InsertMode   addv;
48*39ddd567SLois Curfman McInnes   MPI_Request  *send_waits,*recv_waits;
498965ea79SLois Curfman McInnes   Scalar       *rvalues,*svalues;
508965ea79SLois Curfman McInnes 
518965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
52*39ddd567SLois Curfman McInnes   MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT,
53*39ddd567SLois Curfman McInnes                 MPI_BOR,comm);
54*39ddd567SLois Curfman McInnes   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
55*39ddd567SLois Curfman McInnes     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
568965ea79SLois Curfman McInnes     }
57*39ddd567SLois Curfman McInnes   mdn->insertmode = addv; /* in case this processor had no cache */
588965ea79SLois Curfman McInnes 
598965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
608965ea79SLois Curfman McInnes   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
618965ea79SLois Curfman McInnes   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
62*39ddd567SLois Curfman McInnes   owner = (int *) PETSCMALLOC( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
63*39ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
64*39ddd567SLois Curfman McInnes     idx = mdn->stash.idx[i];
658965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
668965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
678965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
688965ea79SLois Curfman McInnes       }
698965ea79SLois Curfman McInnes     }
708965ea79SLois Curfman McInnes   }
718965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
728965ea79SLois Curfman McInnes 
738965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
748965ea79SLois Curfman McInnes   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
75*39ddd567SLois Curfman McInnes   MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm);
768965ea79SLois Curfman McInnes   nreceives = work[rank];
77*39ddd567SLois Curfman McInnes   MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm);
788965ea79SLois Curfman McInnes   nmax = work[rank];
798965ea79SLois Curfman McInnes   PETSCFREE(work);
808965ea79SLois Curfman McInnes 
818965ea79SLois Curfman McInnes   /* post receives:
828965ea79SLois Curfman McInnes        1) each message will consist of ordered pairs
838965ea79SLois Curfman McInnes      (global index,value) we store the global index as a double
848965ea79SLois Curfman McInnes      to simplify the message passing.
858965ea79SLois Curfman McInnes        2) since we don't know how long each individual message is we
868965ea79SLois Curfman McInnes      allocate the largest needed buffer for each receive. Potentially
878965ea79SLois Curfman McInnes      this is a lot of wasted space.
888965ea79SLois Curfman McInnes 
898965ea79SLois Curfman McInnes        This could be done better.
908965ea79SLois Curfman McInnes   */
918965ea79SLois Curfman McInnes   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
928965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
938965ea79SLois Curfman McInnes   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
948965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
958965ea79SLois Curfman McInnes   for ( i=0; i<nreceives; i++ ) {
96*39ddd567SLois Curfman McInnes     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
978965ea79SLois Curfman McInnes               comm,recv_waits+i);
988965ea79SLois Curfman McInnes   }
998965ea79SLois Curfman McInnes 
1008965ea79SLois Curfman McInnes   /* do sends:
1018965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
1028965ea79SLois Curfman McInnes          the ith processor
1038965ea79SLois Curfman McInnes   */
104*39ddd567SLois Curfman McInnes   svalues = (Scalar *) PETSCMALLOC( 3*(mdn->stash.n+1)*sizeof(Scalar) );
105*39ddd567SLois Curfman McInnes   CHKPTRQ(svalues);
1068965ea79SLois Curfman McInnes   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
1078965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
1088965ea79SLois Curfman McInnes   starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts);
1098965ea79SLois Curfman McInnes   starts[0] = 0;
1108965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
111*39ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
112*39ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
113*39ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
114*39ddd567SLois Curfman McInnes     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
1158965ea79SLois Curfman McInnes   }
1168965ea79SLois Curfman McInnes   PETSCFREE(owner);
1178965ea79SLois Curfman McInnes   starts[0] = 0;
1188965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1198965ea79SLois Curfman McInnes   count = 0;
1208965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
1218965ea79SLois Curfman McInnes     if (procs[i]) {
122*39ddd567SLois Curfman McInnes       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
1238965ea79SLois Curfman McInnes                 comm,send_waits+count++);
1248965ea79SLois Curfman McInnes     }
1258965ea79SLois Curfman McInnes   }
1268965ea79SLois Curfman McInnes   PETSCFREE(starts); PETSCFREE(nprocs);
1278965ea79SLois Curfman McInnes 
1288965ea79SLois Curfman McInnes   /* Free cache space */
129*39ddd567SLois Curfman McInnes   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
1308965ea79SLois Curfman McInnes 
131*39ddd567SLois Curfman McInnes   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
132*39ddd567SLois Curfman McInnes   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
133*39ddd567SLois Curfman McInnes   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
134*39ddd567SLois Curfman McInnes   mdn->rmax       = nmax;
1358965ea79SLois Curfman McInnes 
1368965ea79SLois Curfman McInnes   return 0;
1378965ea79SLois Curfman McInnes }
138*39ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat);
1398965ea79SLois Curfman McInnes 
140*39ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
1418965ea79SLois Curfman McInnes {
142*39ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1438965ea79SLois Curfman McInnes   MPI_Status   *send_status,recv_status;
144*39ddd567SLois Curfman McInnes   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
1458965ea79SLois Curfman McInnes   Scalar       *values,val;
146*39ddd567SLois Curfman McInnes   InsertMode   addv = mdn->insertmode;
1478965ea79SLois Curfman McInnes 
1488965ea79SLois Curfman McInnes   /*  wait on receives */
1498965ea79SLois Curfman McInnes   while (count) {
150*39ddd567SLois Curfman McInnes     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
1518965ea79SLois Curfman McInnes     /* unpack receives into our local space */
152*39ddd567SLois Curfman McInnes     values = mdn->rvalues + 3*imdex*mdn->rmax;
1538965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
1548965ea79SLois Curfman McInnes     n = n/3;
1558965ea79SLois Curfman McInnes     for ( i=0; i<n; i++ ) {
156*39ddd567SLois Curfman McInnes       row = (int) PETSCREAL(values[3*i]) - mdn->rstart;
1578965ea79SLois Curfman McInnes       col = (int) PETSCREAL(values[3*i+1]);
1588965ea79SLois Curfman McInnes       val = values[3*i+2];
159*39ddd567SLois Curfman McInnes       if (col >= 0 && col < mdn->N) {
160*39ddd567SLois Curfman McInnes         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
1618965ea79SLois Curfman McInnes       }
162*39ddd567SLois Curfman McInnes       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
1638965ea79SLois Curfman McInnes     }
1648965ea79SLois Curfman McInnes     count--;
1658965ea79SLois Curfman McInnes   }
166*39ddd567SLois Curfman McInnes   PETSCFREE(mdn->recv_waits); PETSCFREE(mdn->rvalues);
1678965ea79SLois Curfman McInnes 
1688965ea79SLois Curfman McInnes   /* wait on sends */
169*39ddd567SLois Curfman McInnes   if (mdn->nsends) {
170*39ddd567SLois Curfman McInnes     send_status = (MPI_Status *) PETSCMALLOC( mdn->nsends*sizeof(MPI_Status) );
1718965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
172*39ddd567SLois Curfman McInnes     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
1738965ea79SLois Curfman McInnes     PETSCFREE(send_status);
1748965ea79SLois Curfman McInnes   }
175*39ddd567SLois Curfman McInnes   PETSCFREE(mdn->send_waits); PETSCFREE(mdn->svalues);
1768965ea79SLois Curfman McInnes 
177*39ddd567SLois Curfman McInnes   mdn->insertmode = NOT_SET_VALUES;
178*39ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
179*39ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
1808965ea79SLois Curfman McInnes 
181*39ddd567SLois Curfman McInnes   if (!mdn->assembled && mode == FINAL_ASSEMBLY) {
182*39ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
1838965ea79SLois Curfman McInnes   }
184*39ddd567SLois Curfman McInnes   mdn->assembled = 1;
1858965ea79SLois Curfman McInnes   return 0;
1868965ea79SLois Curfman McInnes }
1878965ea79SLois Curfman McInnes 
188*39ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A)
1898965ea79SLois Curfman McInnes {
190*39ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
191*39ddd567SLois Curfman McInnes   return MatZeroEntries(l->A);
1928965ea79SLois Curfman McInnes }
1938965ea79SLois Curfman McInnes 
1948965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
1958965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
1968965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
1978965ea79SLois Curfman McInnes    aij->A and aij->B directly and not through the MatZeroRows()
1988965ea79SLois Curfman McInnes    routine.
1998965ea79SLois Curfman McInnes */
200*39ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
2018965ea79SLois Curfman McInnes {
202*39ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
2038965ea79SLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
2048965ea79SLois Curfman McInnes   int            *procs,*nprocs,j,found,idx,nsends,*work;
2058965ea79SLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
2068965ea79SLois Curfman McInnes   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
2078965ea79SLois Curfman McInnes   int            *lens,imdex,*lrows,*values;
2088965ea79SLois Curfman McInnes   MPI_Comm       comm = A->comm;
2098965ea79SLois Curfman McInnes   MPI_Request    *send_waits,*recv_waits;
2108965ea79SLois Curfman McInnes   MPI_Status     recv_status,*send_status;
2118965ea79SLois Curfman McInnes   IS             istmp;
2128965ea79SLois Curfman McInnes 
213*39ddd567SLois Curfman McInnes   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix");
2148965ea79SLois Curfman McInnes   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
2158965ea79SLois Curfman McInnes   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
2168965ea79SLois Curfman McInnes 
2178965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
2188965ea79SLois Curfman McInnes   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
2198965ea79SLois Curfman McInnes   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
2208965ea79SLois Curfman McInnes   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
2218965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2228965ea79SLois Curfman McInnes     idx = rows[i];
2238965ea79SLois Curfman McInnes     found = 0;
2248965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2258965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
2268965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2278965ea79SLois Curfman McInnes       }
2288965ea79SLois Curfman McInnes     }
229*39ddd567SLois Curfman McInnes     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
2308965ea79SLois Curfman McInnes   }
2318965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2328965ea79SLois Curfman McInnes 
2338965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
2348965ea79SLois Curfman McInnes   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
2358965ea79SLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
2368965ea79SLois Curfman McInnes   nrecvs = work[rank];
2378965ea79SLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
2388965ea79SLois Curfman McInnes   nmax = work[rank];
2398965ea79SLois Curfman McInnes   PETSCFREE(work);
2408965ea79SLois Curfman McInnes 
2418965ea79SLois Curfman McInnes   /* post receives:   */
2428965ea79SLois Curfman McInnes   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
2438965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
2448965ea79SLois Curfman McInnes   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
2458965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
2468965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
2478965ea79SLois Curfman McInnes     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2488965ea79SLois Curfman McInnes   }
2498965ea79SLois Curfman McInnes 
2508965ea79SLois Curfman McInnes   /* do sends:
2518965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
2528965ea79SLois Curfman McInnes          the ith processor
2538965ea79SLois Curfman McInnes   */
2548965ea79SLois Curfman McInnes   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
2558965ea79SLois Curfman McInnes   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
2568965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
2578965ea79SLois Curfman McInnes   starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts);
2588965ea79SLois Curfman McInnes   starts[0] = 0;
2598965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2608965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2618965ea79SLois Curfman McInnes     svalues[starts[owner[i]]++] = rows[i];
2628965ea79SLois Curfman McInnes   }
2638965ea79SLois Curfman McInnes   ISRestoreIndices(is,&rows);
2648965ea79SLois Curfman McInnes 
2658965ea79SLois Curfman McInnes   starts[0] = 0;
2668965ea79SLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2678965ea79SLois Curfman McInnes   count = 0;
2688965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2698965ea79SLois Curfman McInnes     if (procs[i]) {
2708965ea79SLois Curfman McInnes       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
2718965ea79SLois Curfman McInnes     }
2728965ea79SLois Curfman McInnes   }
2738965ea79SLois Curfman McInnes   PETSCFREE(starts);
2748965ea79SLois Curfman McInnes 
2758965ea79SLois Curfman McInnes   base = owners[rank];
2768965ea79SLois Curfman McInnes 
2778965ea79SLois Curfman McInnes   /*  wait on receives */
2788965ea79SLois Curfman McInnes   lens   = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
2798965ea79SLois Curfman McInnes   source = lens + nrecvs;
2808965ea79SLois Curfman McInnes   count  = nrecvs; slen = 0;
2818965ea79SLois Curfman McInnes   while (count) {
2828965ea79SLois Curfman McInnes     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2838965ea79SLois Curfman McInnes     /* unpack receives into our local space */
2848965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPI_INT,&n);
2858965ea79SLois Curfman McInnes     source[imdex]  = recv_status.MPI_SOURCE;
2868965ea79SLois Curfman McInnes     lens[imdex]  = n;
2878965ea79SLois Curfman McInnes     slen += n;
2888965ea79SLois Curfman McInnes     count--;
2898965ea79SLois Curfman McInnes   }
2908965ea79SLois Curfman McInnes   PETSCFREE(recv_waits);
2918965ea79SLois Curfman McInnes 
2928965ea79SLois Curfman McInnes   /* move the data into the send scatter */
2938965ea79SLois Curfman McInnes   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
2948965ea79SLois Curfman McInnes   count = 0;
2958965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
2968965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
2978965ea79SLois Curfman McInnes     for ( j=0; j<lens[i]; j++ ) {
2988965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
2998965ea79SLois Curfman McInnes     }
3008965ea79SLois Curfman McInnes   }
3018965ea79SLois Curfman McInnes   PETSCFREE(rvalues); PETSCFREE(lens);
3028965ea79SLois Curfman McInnes   PETSCFREE(owner); PETSCFREE(nprocs);
3038965ea79SLois Curfman McInnes 
3048965ea79SLois Curfman McInnes   /* actually zap the local rows */
3058965ea79SLois Curfman McInnes   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
3068965ea79SLois Curfman McInnes   PLogObjectParent(A,istmp);
3078965ea79SLois Curfman McInnes   PETSCFREE(lrows);
3088965ea79SLois Curfman McInnes   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
3098965ea79SLois Curfman McInnes   ierr = ISDestroy(istmp); CHKERRQ(ierr);
3108965ea79SLois Curfman McInnes 
3118965ea79SLois Curfman McInnes   /* wait on sends */
3128965ea79SLois Curfman McInnes   if (nsends) {
3138965ea79SLois Curfman McInnes     send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status));
3148965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
3158965ea79SLois Curfman McInnes     MPI_Waitall(nsends,send_waits,send_status);
3168965ea79SLois Curfman McInnes     PETSCFREE(send_status);
3178965ea79SLois Curfman McInnes   }
3188965ea79SLois Curfman McInnes   PETSCFREE(send_waits); PETSCFREE(svalues);
3198965ea79SLois Curfman McInnes 
3208965ea79SLois Curfman McInnes   return 0;
3218965ea79SLois Curfman McInnes }
3228965ea79SLois Curfman McInnes 
323*39ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
3248965ea79SLois Curfman McInnes {
325*39ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3268965ea79SLois Curfman McInnes   int          ierr;
327*39ddd567SLois Curfman McInnes   if (!mdn->assembled)
328*39ddd567SLois Curfman McInnes     SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first");
329*39ddd567SLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
330*39ddd567SLois Curfman McInnes   CHKERRQ(ierr);
331*39ddd567SLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
332*39ddd567SLois Curfman McInnes   CHKERRQ(ierr);
333*39ddd567SLois Curfman McInnes   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
3348965ea79SLois Curfman McInnes   return 0;
3358965ea79SLois Curfman McInnes }
3368965ea79SLois Curfman McInnes 
337*39ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
3388965ea79SLois Curfman McInnes {
339*39ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3408965ea79SLois Curfman McInnes   int          ierr;
341*39ddd567SLois Curfman McInnes   if (!mdn->assembled)
342*39ddd567SLois Curfman McInnes     SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first");
343*39ddd567SLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,ADD_VALUES,SCATTER_ALL,mdn->Mvctx);
344*39ddd567SLois Curfman McInnes   CHKERRQ(ierr);
345*39ddd567SLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,ADD_VALUES,SCATTER_ALL,mdn->Mvctx);
346*39ddd567SLois Curfman McInnes   CHKERRQ(ierr);
347*39ddd567SLois Curfman McInnes   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
3488965ea79SLois Curfman McInnes   return 0;
3498965ea79SLois Curfman McInnes }
3508965ea79SLois Curfman McInnes 
3518965ea79SLois Curfman McInnes /*
3528965ea79SLois Curfman McInnes   This only works correctly for square matrices where the subblock A->A is the
3538965ea79SLois Curfman McInnes    diagonal block
3548965ea79SLois Curfman McInnes */
355*39ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v)
3568965ea79SLois Curfman McInnes {
357*39ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
358*39ddd567SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix");
3598965ea79SLois Curfman McInnes   return MatGetDiagonal(a->A,v);
3608965ea79SLois Curfman McInnes }
3618965ea79SLois Curfman McInnes 
362*39ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj)
3638965ea79SLois Curfman McInnes {
3648965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
365*39ddd567SLois Curfman McInnes   Mat_MPIDense *aij = (Mat_MPIDense *) mat->data;
3668965ea79SLois Curfman McInnes   int          ierr;
3678965ea79SLois Curfman McInnes #if defined(PETSC_LOG)
3688965ea79SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
3698965ea79SLois Curfman McInnes #endif
3708965ea79SLois Curfman McInnes   PETSCFREE(aij->rowners);
3718965ea79SLois Curfman McInnes   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
3728965ea79SLois Curfman McInnes   if (aij->lvec)   VecDestroy(aij->lvec);
3738965ea79SLois Curfman McInnes   if (aij->Mvctx)  VecScatterCtxDestroy(aij->Mvctx);
3748965ea79SLois Curfman McInnes   PETSCFREE(aij);
3758965ea79SLois Curfman McInnes   PLogObjectDestroy(mat);
3768965ea79SLois Curfman McInnes   PETSCHEADERDESTROY(mat);
3778965ea79SLois Curfman McInnes   return 0;
3788965ea79SLois Curfman McInnes }
379*39ddd567SLois Curfman McInnes 
3808965ea79SLois Curfman McInnes #include "pinclude/pviewer.h"
3818965ea79SLois Curfman McInnes 
382*39ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
3838965ea79SLois Curfman McInnes {
384*39ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3858965ea79SLois Curfman McInnes   int          ierr;
386*39ddd567SLois Curfman McInnes   if (mdn->size == 1) {
387*39ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
3888965ea79SLois Curfman McInnes   }
389*39ddd567SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
3908965ea79SLois Curfman McInnes   return 0;
3918965ea79SLois Curfman McInnes }
3928965ea79SLois Curfman McInnes 
393*39ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
3948965ea79SLois Curfman McInnes {
395*39ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
396*39ddd567SLois Curfman McInnes   int          ierr, format;
3978965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
3988965ea79SLois Curfman McInnes   FILE         *fd;
3998965ea79SLois Curfman McInnes 
4008965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
4018965ea79SLois Curfman McInnes     ierr = ViewerFileGetFormat_Private(viewer,&format);
4028965ea79SLois Curfman McInnes     if (format == FILE_FORMAT_INFO) {
403*39ddd567SLois Curfman McInnes       ; /* do nothing for now */
4048965ea79SLois Curfman McInnes     }
4058965ea79SLois Curfman McInnes   }
4068965ea79SLois Curfman McInnes 
4078965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER) {
4088965ea79SLois Curfman McInnes     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
4098965ea79SLois Curfman McInnes     MPIU_Seq_begin(mat->comm,1);
410*39ddd567SLois Curfman McInnes     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
411*39ddd567SLois Curfman McInnes              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
412*39ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4138965ea79SLois Curfman McInnes     fflush(fd);
4148965ea79SLois Curfman McInnes     MPIU_Seq_end(mat->comm,1);
4158965ea79SLois Curfman McInnes   }
4168965ea79SLois Curfman McInnes   else {
417*39ddd567SLois Curfman McInnes     int size = mdn->size, rank = mdn->rank;
4188965ea79SLois Curfman McInnes     if (size == 1) {
419*39ddd567SLois Curfman McInnes       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4208965ea79SLois Curfman McInnes     }
4218965ea79SLois Curfman McInnes     else {
4228965ea79SLois Curfman McInnes       /* assemble the entire matrix onto first processor. */
4238965ea79SLois Curfman McInnes       Mat       A;
424*39ddd567SLois Curfman McInnes       int       M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
425*39ddd567SLois Curfman McInnes       Scalar    *vals;
426*39ddd567SLois Curfman McInnes       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
4278965ea79SLois Curfman McInnes 
4288965ea79SLois Curfman McInnes       if (!rank) {
429*39ddd567SLois Curfman McInnes         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,&A); CHKERRQ(ierr);
4308965ea79SLois Curfman McInnes       }
4318965ea79SLois Curfman McInnes       else {
432*39ddd567SLois Curfman McInnes         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,&A); CHKERRQ(ierr);
4338965ea79SLois Curfman McInnes       }
4348965ea79SLois Curfman McInnes       PLogObjectParent(mat,A);
4358965ea79SLois Curfman McInnes 
436*39ddd567SLois Curfman McInnes       /* Copy the matrix ... This isn't the most efficient means,
437*39ddd567SLois Curfman McInnes          but it's quick for now */
438*39ddd567SLois Curfman McInnes       row = mdn->rstart; m = Amdn->m;
4398965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
440*39ddd567SLois Curfman McInnes         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
441*39ddd567SLois Curfman McInnes         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
442*39ddd567SLois Curfman McInnes         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
443*39ddd567SLois Curfman McInnes         row++;
4448965ea79SLois Curfman McInnes       }
4458965ea79SLois Curfman McInnes 
4468965ea79SLois Curfman McInnes       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
4478965ea79SLois Curfman McInnes       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
4488965ea79SLois Curfman McInnes       if (!rank) {
449*39ddd567SLois Curfman McInnes         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
4508965ea79SLois Curfman McInnes       }
4518965ea79SLois Curfman McInnes       ierr = MatDestroy(A); CHKERRQ(ierr);
4528965ea79SLois Curfman McInnes     }
4538965ea79SLois Curfman McInnes   }
4548965ea79SLois Curfman McInnes   return 0;
4558965ea79SLois Curfman McInnes }
4568965ea79SLois Curfman McInnes 
457*39ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer)
4588965ea79SLois Curfman McInnes {
4598965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
460*39ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4618965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
462*39ddd567SLois Curfman McInnes   int          ierr;
4638965ea79SLois Curfman McInnes 
464*39ddd567SLois Curfman McInnes   if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix");
4658965ea79SLois Curfman McInnes   if (!viewer) {
4668965ea79SLois Curfman McInnes     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
4678965ea79SLois Curfman McInnes   }
468*39ddd567SLois Curfman McInnes   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
469*39ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
4708965ea79SLois Curfman McInnes   }
4718965ea79SLois Curfman McInnes   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
472*39ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
4738965ea79SLois Curfman McInnes   }
4748965ea79SLois Curfman McInnes   else if (vobj->type == BINARY_FILE_VIEWER) {
475*39ddd567SLois Curfman McInnes     return MatView_MPIDense_Binary(mat,viewer);
4768965ea79SLois Curfman McInnes   }
4778965ea79SLois Curfman McInnes   return 0;
4788965ea79SLois Curfman McInnes }
4798965ea79SLois Curfman McInnes 
480*39ddd567SLois Curfman McInnes static int MatGetInfo_MPIDense(Mat matin,MatInfoType flag,int *nz,
4818965ea79SLois Curfman McInnes                              int *nzalloc,int *mem)
4828965ea79SLois Curfman McInnes {
483*39ddd567SLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
484*39ddd567SLois Curfman McInnes   Mat          A = mat->A;
485*39ddd567SLois Curfman McInnes   int          ierr, isend[3], irecv[3];
4868965ea79SLois Curfman McInnes 
487*39ddd567SLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
4888965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
4898965ea79SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
4908965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
4918965ea79SLois Curfman McInnes     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
4928965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
4938965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
4948965ea79SLois Curfman McInnes     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
4958965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
4968965ea79SLois Curfman McInnes   }
4978965ea79SLois Curfman McInnes   return 0;
4988965ea79SLois Curfman McInnes }
4998965ea79SLois Curfman McInnes 
500*39ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op)
5018965ea79SLois Curfman McInnes {
502*39ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
5038965ea79SLois Curfman McInnes 
5048965ea79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
5058965ea79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
5068965ea79SLois Curfman McInnes       op == COLUMNS_SORTED ||
5078965ea79SLois Curfman McInnes       op == ROW_ORIENTED) {
5088965ea79SLois Curfman McInnes         MatSetOption(a->A,op);
5098965ea79SLois Curfman McInnes   }
5108965ea79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
5118965ea79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
5128965ea79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
5138965ea79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
514*39ddd567SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
5158965ea79SLois Curfman McInnes   else if (op == COLUMN_ORIENTED)
516*39ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");}
5178965ea79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
518*39ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
5198965ea79SLois Curfman McInnes   else
520*39ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
5218965ea79SLois Curfman McInnes   return 0;
5228965ea79SLois Curfman McInnes }
5238965ea79SLois Curfman McInnes 
524*39ddd567SLois Curfman McInnes static int MatGetSize_MPIDense(Mat matin,int *m,int *n)
5258965ea79SLois Curfman McInnes {
526*39ddd567SLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
5278965ea79SLois Curfman McInnes   *m = mat->M; *n = mat->N;
5288965ea79SLois Curfman McInnes   return 0;
5298965ea79SLois Curfman McInnes }
5308965ea79SLois Curfman McInnes 
531*39ddd567SLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat matin,int *m,int *n)
5328965ea79SLois Curfman McInnes {
533*39ddd567SLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
5348965ea79SLois Curfman McInnes   *m = mat->m; *n = mat->N;
5358965ea79SLois Curfman McInnes   return 0;
5368965ea79SLois Curfman McInnes }
5378965ea79SLois Curfman McInnes 
538*39ddd567SLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat matin,int *m,int *n)
5398965ea79SLois Curfman McInnes {
540*39ddd567SLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
5418965ea79SLois Curfman McInnes   *m = mat->rstart; *n = mat->rend;
5428965ea79SLois Curfman McInnes   return 0;
5438965ea79SLois Curfman McInnes }
5448965ea79SLois Curfman McInnes 
545*39ddd567SLois Curfman McInnes static int MatGetRow_MPIDense(Mat matin,int row,int *nz,int **idx,Scalar **v)
5468965ea79SLois Curfman McInnes {
547*39ddd567SLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
548*39ddd567SLois Curfman McInnes   int          lrow, rstart = mat->rstart, rend = mat->rend;
5498965ea79SLois Curfman McInnes 
550*39ddd567SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
5518965ea79SLois Curfman McInnes   lrow = row - rstart;
552*39ddd567SLois Curfman McInnes   return MatGetRow(mat->A,lrow,nz,idx,v);
5538965ea79SLois Curfman McInnes }
5548965ea79SLois Curfman McInnes 
555*39ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
5568965ea79SLois Curfman McInnes {
5578965ea79SLois Curfman McInnes   if (idx) PETSCFREE(*idx);
5588965ea79SLois Curfman McInnes   if (v) PETSCFREE(*v);
5598965ea79SLois Curfman McInnes   return 0;
5608965ea79SLois Curfman McInnes }
5618965ea79SLois Curfman McInnes 
562*39ddd567SLois Curfman McInnes static int MatCopyPrivate_MPIDense(Mat,Mat *);
5638965ea79SLois Curfman McInnes 
5648965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
565*39ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense,
566*39ddd567SLois Curfman McInnes        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
567*39ddd567SLois Curfman McInnes        MatMult_MPIDense,MatMultAdd_MPIDense,
568*39ddd567SLois Curfman McInnes        0, 0,
569*39ddd567SLois Curfman McInnes        0, 0,
570*39ddd567SLois Curfman McInnes        0, 0, 0,
571*39ddd567SLois Curfman McInnes        0, 0, 0,
572*39ddd567SLois Curfman McInnes        MatGetInfo_MPIDense, 0,
573*39ddd567SLois Curfman McInnes        MatGetDiagonal_MPIDense, 0, 0,
574*39ddd567SLois Curfman McInnes        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
5758965ea79SLois Curfman McInnes        0,
576*39ddd567SLois Curfman McInnes        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
577*39ddd567SLois Curfman McInnes        0,
578*39ddd567SLois Curfman McInnes        0, 0, 0, 0,
579*39ddd567SLois Curfman McInnes        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
580*39ddd567SLois Curfman McInnes        MatGetOwnershipRange_MPIDense,
581*39ddd567SLois Curfman McInnes        0, 0,
582*39ddd567SLois Curfman McInnes        0, 0, 0, 0, 0, MatCopyPrivate_MPIDense};
5838965ea79SLois Curfman McInnes 
5848965ea79SLois Curfman McInnes /*@C
585*39ddd567SLois Curfman McInnes    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
5868965ea79SLois Curfman McInnes 
5878965ea79SLois Curfman McInnes    Input Parameters:
5888965ea79SLois Curfman McInnes .  comm - MPI communicator
5898965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
5908965ea79SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
5918965ea79SLois Curfman McInnes            if N is given)
5928965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
5938965ea79SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
5948965ea79SLois Curfman McInnes            if n is given)
5958965ea79SLois Curfman McInnes 
5968965ea79SLois Curfman McInnes    Output Parameter:
5978965ea79SLois Curfman McInnes .  newmat - the matrix
5988965ea79SLois Curfman McInnes 
5998965ea79SLois Curfman McInnes    Notes:
600*39ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
601*39ddd567SLois Curfman McInnes    storage by columns.
6028965ea79SLois Curfman McInnes 
6038965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
6048965ea79SLois Curfman McInnes    (possibly both).
6058965ea79SLois Curfman McInnes 
606*39ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel
6078965ea79SLois Curfman McInnes 
608*39ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
6098965ea79SLois Curfman McInnes @*/
610*39ddd567SLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Mat *newmat)
6118965ea79SLois Curfman McInnes {
6128965ea79SLois Curfman McInnes   Mat          mat;
613*39ddd567SLois Curfman McInnes   Mat_MPIDense *a;
614*39ddd567SLois Curfman McInnes   int          ierr, i;
6158965ea79SLois Curfman McInnes 
6168965ea79SLois Curfman McInnes   *newmat         = 0;
617*39ddd567SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
6188965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
619*39ddd567SLois Curfman McInnes   mat->data       = (void *) (a = PETSCNEW(Mat_MPIDense)); CHKPTRQ(a);
6208965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
621*39ddd567SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIDense;
622*39ddd567SLois Curfman McInnes   mat->view       = MatView_MPIDense;
6238965ea79SLois Curfman McInnes   mat->factor     = 0;
6248965ea79SLois Curfman McInnes 
6258965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
6268965ea79SLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
6278965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
6288965ea79SLois Curfman McInnes 
629*39ddd567SLois Curfman McInnes   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
6308965ea79SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
631*39ddd567SLois Curfman McInnes 
632*39ddd567SLois Curfman McInnes   /* each row stores all columns */
633*39ddd567SLois Curfman McInnes   if (N == PETSC_DECIDE) N = n;
634*39ddd567SLois Curfman McInnes   if (n == PETSC_DECIDE) n = N;
635*39ddd567SLois Curfman McInnes   if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported");
6368965ea79SLois Curfman McInnes   a->N = N;
6378965ea79SLois Curfman McInnes   a->M = M;
638*39ddd567SLois Curfman McInnes   a->m = m;
639*39ddd567SLois Curfman McInnes   a->n = n;
6408965ea79SLois Curfman McInnes 
6418965ea79SLois Curfman McInnes   /* build local table of row and column ownerships */
642*39ddd567SLois Curfman McInnes   a->rowners = (int *) PETSCMALLOC((a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
643*39ddd567SLois Curfman McInnes   PLogObjectMemory(mat,(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
644*39ddd567SLois Curfman McInnes                        sizeof(Mat_MPIDense));
6458965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
6468965ea79SLois Curfman McInnes   a->rowners[0] = 0;
6478965ea79SLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
6488965ea79SLois Curfman McInnes     a->rowners[i] += a->rowners[i-1];
6498965ea79SLois Curfman McInnes   }
6508965ea79SLois Curfman McInnes   a->rstart = a->rowners[a->rank];
6518965ea79SLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
6528965ea79SLois Curfman McInnes 
653*39ddd567SLois Curfman McInnes   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,&a->A); CHKERRQ(ierr);
6548965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
6558965ea79SLois Curfman McInnes 
6568965ea79SLois Curfman McInnes   /* build cache for off array entries formed */
6578965ea79SLois Curfman McInnes   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
6588965ea79SLois Curfman McInnes 
6598965ea79SLois Curfman McInnes   /* stuff used for matrix vector multiply */
6608965ea79SLois Curfman McInnes   a->lvec      = 0;
6618965ea79SLois Curfman McInnes   a->Mvctx     = 0;
6628965ea79SLois Curfman McInnes   a->assembled = 0;
6638965ea79SLois Curfman McInnes 
6648965ea79SLois Curfman McInnes   *newmat = mat;
6658965ea79SLois Curfman McInnes   return 0;
6668965ea79SLois Curfman McInnes }
6678965ea79SLois Curfman McInnes 
668*39ddd567SLois Curfman McInnes static int MatCopyPrivate_MPIDense(Mat matin,Mat *newmat)
6698965ea79SLois Curfman McInnes {
6708965ea79SLois Curfman McInnes   Mat        mat;
671*39ddd567SLois Curfman McInnes   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) matin->data;
672*39ddd567SLois Curfman McInnes   int        ierr;
6738965ea79SLois Curfman McInnes 
674*39ddd567SLois Curfman McInnes   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix");
6758965ea79SLois Curfman McInnes   *newmat       = 0;
676*39ddd567SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIDENSE,matin->comm);
6778965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
678*39ddd567SLois Curfman McInnes   mat->data     = (void *) (a = PETSCNEW(Mat_MPIDense)); CHKPTRQ(a);
6798965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
680*39ddd567SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIDense;
681*39ddd567SLois Curfman McInnes   mat->view     = MatView_MPIDense;
6828965ea79SLois Curfman McInnes   mat->factor   = matin->factor;
6838965ea79SLois Curfman McInnes 
6848965ea79SLois Curfman McInnes   a->m          = oldmat->m;
6858965ea79SLois Curfman McInnes   a->n          = oldmat->n;
6868965ea79SLois Curfman McInnes   a->M          = oldmat->M;
6878965ea79SLois Curfman McInnes   a->N          = oldmat->N;
6888965ea79SLois Curfman McInnes 
6898965ea79SLois Curfman McInnes   a->assembled  = 1;
6908965ea79SLois Curfman McInnes   a->rstart     = oldmat->rstart;
6918965ea79SLois Curfman McInnes   a->rend       = oldmat->rend;
6928965ea79SLois Curfman McInnes   a->size       = oldmat->size;
6938965ea79SLois Curfman McInnes   a->rank       = oldmat->rank;
6948965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
6958965ea79SLois Curfman McInnes 
6968965ea79SLois Curfman McInnes   a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
697*39ddd567SLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
6988965ea79SLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
6998965ea79SLois Curfman McInnes   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
7008965ea79SLois Curfman McInnes 
7018965ea79SLois Curfman McInnes   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
7028965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->lvec);
7038965ea79SLois Curfman McInnes   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
7048965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->Mvctx);
7058965ea79SLois Curfman McInnes   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
7068965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
7078965ea79SLois Curfman McInnes   *newmat = mat;
7088965ea79SLois Curfman McInnes   return 0;
7098965ea79SLois Curfman McInnes }
7108965ea79SLois Curfman McInnes 
7118965ea79SLois Curfman McInnes #include "sysio.h"
7128965ea79SLois Curfman McInnes 
713*39ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
7148965ea79SLois Curfman McInnes {
7158965ea79SLois Curfman McInnes   Mat          A;
7168965ea79SLois Curfman McInnes   int          i, nz, ierr, j,rstart, rend, fd;
7178965ea79SLois Curfman McInnes   Scalar       *vals,*svals;
7188965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) bview;
7198965ea79SLois Curfman McInnes   MPI_Comm     comm = vobj->comm;
7208965ea79SLois Curfman McInnes   MPI_Status   status;
7218965ea79SLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
7228965ea79SLois Curfman McInnes   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
7238965ea79SLois Curfman McInnes   int          tag = ((PetscObject)bview)->tag;
7248965ea79SLois Curfman McInnes 
7258965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
7268965ea79SLois Curfman McInnes   if (!rank) {
7278965ea79SLois Curfman McInnes     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
7288965ea79SLois Curfman McInnes     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
729*39ddd567SLois Curfman McInnes     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
7308965ea79SLois Curfman McInnes   }
7318965ea79SLois Curfman McInnes 
7328965ea79SLois Curfman McInnes   MPI_Bcast(header+1,3,MPI_INT,0,comm);
7338965ea79SLois Curfman McInnes   M = header[1]; N = header[2];
7348965ea79SLois Curfman McInnes   /* determine ownership of all rows */
7358965ea79SLois Curfman McInnes   m = M/size + ((M % size) > rank);
7368965ea79SLois Curfman McInnes   rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners);
7378965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
7388965ea79SLois Curfman McInnes   rowners[0] = 0;
7398965ea79SLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
7408965ea79SLois Curfman McInnes     rowners[i] += rowners[i-1];
7418965ea79SLois Curfman McInnes   }
7428965ea79SLois Curfman McInnes   rstart = rowners[rank];
7438965ea79SLois Curfman McInnes   rend   = rowners[rank+1];
7448965ea79SLois Curfman McInnes 
7458965ea79SLois Curfman McInnes   /* distribute row lengths to all processors */
7468965ea79SLois Curfman McInnes   ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
7478965ea79SLois Curfman McInnes   offlens = ourlens + (rend-rstart);
7488965ea79SLois Curfman McInnes   if (!rank) {
7498965ea79SLois Curfman McInnes     rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths);
7508965ea79SLois Curfman McInnes     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
7518965ea79SLois Curfman McInnes     sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts);
7528965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
7538965ea79SLois Curfman McInnes     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
7548965ea79SLois Curfman McInnes     PETSCFREE(sndcounts);
7558965ea79SLois Curfman McInnes   }
7568965ea79SLois Curfman McInnes   else {
7578965ea79SLois Curfman McInnes     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
7588965ea79SLois Curfman McInnes   }
7598965ea79SLois Curfman McInnes 
7608965ea79SLois Curfman McInnes   if (!rank) {
7618965ea79SLois Curfman McInnes     /* calculate the number of nonzeros on each processor */
7628965ea79SLois Curfman McInnes     procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz);
7638965ea79SLois Curfman McInnes     PetscZero(procsnz,size*sizeof(int));
7648965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
7658965ea79SLois Curfman McInnes       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
7668965ea79SLois Curfman McInnes         procsnz[i] += rowlengths[j];
7678965ea79SLois Curfman McInnes       }
7688965ea79SLois Curfman McInnes     }
7698965ea79SLois Curfman McInnes     PETSCFREE(rowlengths);
7708965ea79SLois Curfman McInnes 
7718965ea79SLois Curfman McInnes     /* determine max buffer needed and allocate it */
7728965ea79SLois Curfman McInnes     maxnz = 0;
7738965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
7748965ea79SLois Curfman McInnes       maxnz = PETSCMAX(maxnz,procsnz[i]);
7758965ea79SLois Curfman McInnes     }
7768965ea79SLois Curfman McInnes     cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols);
7778965ea79SLois Curfman McInnes 
7788965ea79SLois Curfman McInnes     /* read in my part of the matrix column indices  */
7798965ea79SLois Curfman McInnes     nz = procsnz[0];
7808965ea79SLois Curfman McInnes     mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
7818965ea79SLois Curfman McInnes     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
7828965ea79SLois Curfman McInnes 
7838965ea79SLois Curfman McInnes     /* read in every one elses and ship off */
7848965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
7858965ea79SLois Curfman McInnes       nz = procsnz[i];
7868965ea79SLois Curfman McInnes       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
7878965ea79SLois Curfman McInnes       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
7888965ea79SLois Curfman McInnes     }
7898965ea79SLois Curfman McInnes     PETSCFREE(cols);
7908965ea79SLois Curfman McInnes   }
7918965ea79SLois Curfman McInnes   else {
7928965ea79SLois Curfman McInnes     /* determine buffer space needed for message */
7938965ea79SLois Curfman McInnes     nz = 0;
7948965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
7958965ea79SLois Curfman McInnes       nz += ourlens[i];
7968965ea79SLois Curfman McInnes     }
7978965ea79SLois Curfman McInnes     mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
7988965ea79SLois Curfman McInnes 
7998965ea79SLois Curfman McInnes     /* receive message of column indices*/
8008965ea79SLois Curfman McInnes     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
8018965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPI_INT,&maxnz);
802*39ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
8038965ea79SLois Curfman McInnes   }
8048965ea79SLois Curfman McInnes 
8058965ea79SLois Curfman McInnes   /* loop over local rows, determining number of off diagonal entries */
8068965ea79SLois Curfman McInnes   PetscZero(offlens,m*sizeof(int));
8078965ea79SLois Curfman McInnes   jj = 0;
8088965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
8098965ea79SLois Curfman McInnes     for ( j=0; j<ourlens[i]; j++ ) {
8108965ea79SLois Curfman McInnes       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
8118965ea79SLois Curfman McInnes       jj++;
8128965ea79SLois Curfman McInnes     }
8138965ea79SLois Curfman McInnes   }
8148965ea79SLois Curfman McInnes 
8158965ea79SLois Curfman McInnes   /* create our matrix */
8168965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
8178965ea79SLois Curfman McInnes     ourlens[i] -= offlens[i];
8188965ea79SLois Curfman McInnes   }
819*39ddd567SLois Curfman McInnes   if (type == MATMPIDENSE) {
820*39ddd567SLois Curfman McInnes     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
8218965ea79SLois Curfman McInnes   }
8228965ea79SLois Curfman McInnes   A = *newmat;
8238965ea79SLois Curfman McInnes   MatSetOption(A,COLUMNS_SORTED);
8248965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
8258965ea79SLois Curfman McInnes     ourlens[i] += offlens[i];
8268965ea79SLois Curfman McInnes   }
8278965ea79SLois Curfman McInnes 
8288965ea79SLois Curfman McInnes   if (!rank) {
8298965ea79SLois Curfman McInnes     vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
8308965ea79SLois Curfman McInnes 
8318965ea79SLois Curfman McInnes     /* read in my part of the matrix numerical values  */
8328965ea79SLois Curfman McInnes     nz = procsnz[0];
8338965ea79SLois Curfman McInnes     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
8348965ea79SLois Curfman McInnes 
8358965ea79SLois Curfman McInnes     /* insert into matrix */
8368965ea79SLois Curfman McInnes     jj      = rstart;
8378965ea79SLois Curfman McInnes     smycols = mycols;
8388965ea79SLois Curfman McInnes     svals   = vals;
8398965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
8408965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
8418965ea79SLois Curfman McInnes       smycols += ourlens[i];
8428965ea79SLois Curfman McInnes       svals   += ourlens[i];
8438965ea79SLois Curfman McInnes       jj++;
8448965ea79SLois Curfman McInnes     }
8458965ea79SLois Curfman McInnes 
8468965ea79SLois Curfman McInnes     /* read in other processors and ship out */
8478965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
8488965ea79SLois Curfman McInnes       nz = procsnz[i];
8498965ea79SLois Curfman McInnes       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
8508965ea79SLois Curfman McInnes       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
8518965ea79SLois Curfman McInnes     }
8528965ea79SLois Curfman McInnes     PETSCFREE(procsnz);
8538965ea79SLois Curfman McInnes   }
8548965ea79SLois Curfman McInnes   else {
8558965ea79SLois Curfman McInnes     /* receive numeric values */
8568965ea79SLois Curfman McInnes     vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals);
8578965ea79SLois Curfman McInnes 
8588965ea79SLois Curfman McInnes     /* receive message of values*/
8598965ea79SLois Curfman McInnes     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
8608965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
861*39ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
8628965ea79SLois Curfman McInnes 
8638965ea79SLois Curfman McInnes     /* insert into matrix */
8648965ea79SLois Curfman McInnes     jj      = rstart;
8658965ea79SLois Curfman McInnes     smycols = mycols;
8668965ea79SLois Curfman McInnes     svals   = vals;
8678965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
8688965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
8698965ea79SLois Curfman McInnes       smycols += ourlens[i];
8708965ea79SLois Curfman McInnes       svals   += ourlens[i];
8718965ea79SLois Curfman McInnes       jj++;
8728965ea79SLois Curfman McInnes     }
8738965ea79SLois Curfman McInnes   }
8748965ea79SLois Curfman McInnes   PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners);
8758965ea79SLois Curfman McInnes 
8768965ea79SLois Curfman McInnes   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
8778965ea79SLois Curfman McInnes   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
8788965ea79SLois Curfman McInnes   return 0;
8798965ea79SLois Curfman McInnes }
880