xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 18f449ed41991115d803c2e2c8f42c76f04cb975)
18965ea79SLois Curfman McInnes #ifndef lint
2*18f449edSLois Curfman McInnes static char vcid[] = "$Id: mpidense.c,v 1.11 1995/11/09 22:28:45 bsmith Exp curfman $";
38965ea79SLois Curfman McInnes #endif
48965ea79SLois Curfman McInnes 
5ed3cc1f0SBarry Smith /*
6ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
7ed3cc1f0SBarry Smith */
8ed3cc1f0SBarry Smith 
98965ea79SLois Curfman McInnes #include "mpidense.h"
108965ea79SLois Curfman McInnes #include "vec/vecimpl.h"
118965ea79SLois Curfman McInnes #include "inline/spops.h"
128965ea79SLois Curfman McInnes 
1339ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
148965ea79SLois Curfman McInnes                                int *idxn,Scalar *v,InsertMode addv)
158965ea79SLois Curfman McInnes {
1639ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1739ddd567SLois Curfman McInnes   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
188965ea79SLois Curfman McInnes 
1939ddd567SLois Curfman McInnes   if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) {
2039ddd567SLois Curfman McInnes     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
218965ea79SLois Curfman McInnes   }
2239ddd567SLois Curfman McInnes   mdn->insertmode = addv;
238965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
2439ddd567SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
2539ddd567SLois Curfman McInnes     if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
268965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
278965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
288965ea79SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
2939ddd567SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
3039ddd567SLois Curfman McInnes         if (idxn[j] >= mdn->N)
3139ddd567SLois Curfman McInnes           SETERRQ(1,"MatSetValues_MPIDense:Column too large");
3239ddd567SLois Curfman McInnes         ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv);
3339ddd567SLois Curfman McInnes         CHKERRQ(ierr);
348965ea79SLois Curfman McInnes       }
358965ea79SLois Curfman McInnes     }
368965ea79SLois Curfman McInnes     else {
3739ddd567SLois Curfman McInnes       ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv);
3839ddd567SLois Curfman McInnes       CHKERRQ(ierr);
398965ea79SLois Curfman McInnes     }
408965ea79SLois Curfman McInnes   }
418965ea79SLois Curfman McInnes   return 0;
428965ea79SLois Curfman McInnes }
438965ea79SLois Curfman McInnes 
4439ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
458965ea79SLois Curfman McInnes {
4639ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
478965ea79SLois Curfman McInnes   MPI_Comm     comm = mat->comm;
4839ddd567SLois Curfman McInnes   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
498965ea79SLois Curfman McInnes   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
5039ddd567SLois Curfman McInnes   int          tag = mat->tag, *owner,*starts,count,ierr;
518965ea79SLois Curfman McInnes   InsertMode   addv;
5239ddd567SLois Curfman McInnes   MPI_Request  *send_waits,*recv_waits;
538965ea79SLois Curfman McInnes   Scalar       *rvalues,*svalues;
548965ea79SLois Curfman McInnes 
558965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
5639ddd567SLois Curfman McInnes   MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT,
5739ddd567SLois Curfman McInnes                 MPI_BOR,comm);
5839ddd567SLois Curfman McInnes   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
5939ddd567SLois Curfman McInnes     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
608965ea79SLois Curfman McInnes     }
6139ddd567SLois Curfman McInnes   mdn->insertmode = addv; /* in case this processor had no cache */
628965ea79SLois Curfman McInnes 
638965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
640452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
65cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
660452661fSBarry Smith   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
6739ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
6839ddd567SLois Curfman McInnes     idx = mdn->stash.idx[i];
698965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
708965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
718965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
728965ea79SLois Curfman McInnes       }
738965ea79SLois Curfman McInnes     }
748965ea79SLois Curfman McInnes   }
758965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
768965ea79SLois Curfman McInnes 
778965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
780452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
7939ddd567SLois Curfman McInnes   MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm);
808965ea79SLois Curfman McInnes   nreceives = work[rank];
8139ddd567SLois Curfman McInnes   MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm);
828965ea79SLois Curfman McInnes   nmax = work[rank];
830452661fSBarry Smith   PetscFree(work);
848965ea79SLois Curfman McInnes 
858965ea79SLois Curfman McInnes   /* post receives:
868965ea79SLois Curfman McInnes        1) each message will consist of ordered pairs
878965ea79SLois Curfman McInnes      (global index,value) we store the global index as a double
888965ea79SLois Curfman McInnes      to simplify the message passing.
898965ea79SLois Curfman McInnes        2) since we don't know how long each individual message is we
908965ea79SLois Curfman McInnes      allocate the largest needed buffer for each receive. Potentially
918965ea79SLois Curfman McInnes      this is a lot of wasted space.
928965ea79SLois Curfman McInnes 
938965ea79SLois Curfman McInnes        This could be done better.
948965ea79SLois Curfman McInnes   */
950452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
968965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
970452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
988965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
998965ea79SLois Curfman McInnes   for ( i=0; i<nreceives; i++ ) {
10039ddd567SLois Curfman McInnes     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1018965ea79SLois Curfman McInnes               comm,recv_waits+i);
1028965ea79SLois Curfman McInnes   }
1038965ea79SLois Curfman McInnes 
1048965ea79SLois Curfman McInnes   /* do sends:
1058965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
1068965ea79SLois Curfman McInnes          the ith processor
1078965ea79SLois Curfman McInnes   */
1080452661fSBarry Smith   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
10939ddd567SLois Curfman McInnes   CHKPTRQ(svalues);
1100452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
1118965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
1120452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1138965ea79SLois Curfman McInnes   starts[0] = 0;
1148965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
11539ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
11639ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
11739ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
11839ddd567SLois Curfman McInnes     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
1198965ea79SLois Curfman McInnes   }
1200452661fSBarry Smith   PetscFree(owner);
1218965ea79SLois Curfman McInnes   starts[0] = 0;
1228965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1238965ea79SLois Curfman McInnes   count = 0;
1248965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
1258965ea79SLois Curfman McInnes     if (procs[i]) {
12639ddd567SLois Curfman McInnes       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
1278965ea79SLois Curfman McInnes                 comm,send_waits+count++);
1288965ea79SLois Curfman McInnes     }
1298965ea79SLois Curfman McInnes   }
1300452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
1318965ea79SLois Curfman McInnes 
1328965ea79SLois Curfman McInnes   /* Free cache space */
13339ddd567SLois Curfman McInnes   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
1348965ea79SLois Curfman McInnes 
13539ddd567SLois Curfman McInnes   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
13639ddd567SLois Curfman McInnes   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
13739ddd567SLois Curfman McInnes   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
13839ddd567SLois Curfman McInnes   mdn->rmax       = nmax;
1398965ea79SLois Curfman McInnes 
1408965ea79SLois Curfman McInnes   return 0;
1418965ea79SLois Curfman McInnes }
14239ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat);
1438965ea79SLois Curfman McInnes 
14439ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
1458965ea79SLois Curfman McInnes {
14639ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1478965ea79SLois Curfman McInnes   MPI_Status   *send_status,recv_status;
14839ddd567SLois Curfman McInnes   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
1498965ea79SLois Curfman McInnes   Scalar       *values,val;
15039ddd567SLois Curfman McInnes   InsertMode   addv = mdn->insertmode;
1518965ea79SLois Curfman McInnes 
1528965ea79SLois Curfman McInnes   /*  wait on receives */
1538965ea79SLois Curfman McInnes   while (count) {
15439ddd567SLois Curfman McInnes     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
1558965ea79SLois Curfman McInnes     /* unpack receives into our local space */
15639ddd567SLois Curfman McInnes     values = mdn->rvalues + 3*imdex*mdn->rmax;
1578965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
1588965ea79SLois Curfman McInnes     n = n/3;
1598965ea79SLois Curfman McInnes     for ( i=0; i<n; i++ ) {
16039ddd567SLois Curfman McInnes       row = (int) PETSCREAL(values[3*i]) - mdn->rstart;
1618965ea79SLois Curfman McInnes       col = (int) PETSCREAL(values[3*i+1]);
1628965ea79SLois Curfman McInnes       val = values[3*i+2];
16339ddd567SLois Curfman McInnes       if (col >= 0 && col < mdn->N) {
16439ddd567SLois Curfman McInnes         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
1658965ea79SLois Curfman McInnes       }
16639ddd567SLois Curfman McInnes       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
1678965ea79SLois Curfman McInnes     }
1688965ea79SLois Curfman McInnes     count--;
1698965ea79SLois Curfman McInnes   }
1700452661fSBarry Smith   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
1718965ea79SLois Curfman McInnes 
1728965ea79SLois Curfman McInnes   /* wait on sends */
17339ddd567SLois Curfman McInnes   if (mdn->nsends) {
1740452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) );
1758965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
17639ddd567SLois Curfman McInnes     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
1770452661fSBarry Smith     PetscFree(send_status);
1788965ea79SLois Curfman McInnes   }
1790452661fSBarry Smith   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
1808965ea79SLois Curfman McInnes 
18139ddd567SLois Curfman McInnes   mdn->insertmode = NOT_SET_VALUES;
18239ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
18339ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
1848965ea79SLois Curfman McInnes 
18539ddd567SLois Curfman McInnes   if (!mdn->assembled && mode == FINAL_ASSEMBLY) {
18639ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
1878965ea79SLois Curfman McInnes   }
18839ddd567SLois Curfman McInnes   mdn->assembled = 1;
1898965ea79SLois Curfman McInnes   return 0;
1908965ea79SLois Curfman McInnes }
1918965ea79SLois Curfman McInnes 
19239ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A)
1938965ea79SLois Curfman McInnes {
19439ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
19539ddd567SLois Curfman McInnes   return MatZeroEntries(l->A);
1968965ea79SLois Curfman McInnes }
1978965ea79SLois Curfman McInnes 
1988965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
1998965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
2008965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
2013501a2bdSLois Curfman McInnes    mdn->A and mdn->B directly and not through the MatZeroRows()
2028965ea79SLois Curfman McInnes    routine.
2038965ea79SLois Curfman McInnes */
20439ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
2058965ea79SLois Curfman McInnes {
20639ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
2078965ea79SLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
2088965ea79SLois Curfman McInnes   int            *procs,*nprocs,j,found,idx,nsends,*work;
2098965ea79SLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
2108965ea79SLois Curfman McInnes   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
2118965ea79SLois Curfman McInnes   int            *lens,imdex,*lrows,*values;
2128965ea79SLois Curfman McInnes   MPI_Comm       comm = A->comm;
2138965ea79SLois Curfman McInnes   MPI_Request    *send_waits,*recv_waits;
2148965ea79SLois Curfman McInnes   MPI_Status     recv_status,*send_status;
2158965ea79SLois Curfman McInnes   IS             istmp;
2168965ea79SLois Curfman McInnes 
21739ddd567SLois Curfman McInnes   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix");
2188965ea79SLois Curfman McInnes   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
2198965ea79SLois Curfman McInnes   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
2208965ea79SLois Curfman McInnes 
2218965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
2220452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
223cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
2240452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
2258965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2268965ea79SLois Curfman McInnes     idx = rows[i];
2278965ea79SLois Curfman McInnes     found = 0;
2288965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2298965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
2308965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2318965ea79SLois Curfman McInnes       }
2328965ea79SLois Curfman McInnes     }
23339ddd567SLois Curfman McInnes     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
2348965ea79SLois Curfman McInnes   }
2358965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2368965ea79SLois Curfman McInnes 
2378965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
2380452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
2398965ea79SLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
2408965ea79SLois Curfman McInnes   nrecvs = work[rank];
2418965ea79SLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
2428965ea79SLois Curfman McInnes   nmax = work[rank];
2430452661fSBarry Smith   PetscFree(work);
2448965ea79SLois Curfman McInnes 
2458965ea79SLois Curfman McInnes   /* post receives:   */
2460452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
2478965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
2480452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
2498965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
2508965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
2518965ea79SLois Curfman McInnes     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2528965ea79SLois Curfman McInnes   }
2538965ea79SLois Curfman McInnes 
2548965ea79SLois Curfman McInnes   /* do sends:
2558965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
2568965ea79SLois Curfman McInnes          the ith processor
2578965ea79SLois Curfman McInnes   */
2580452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
2590452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
2608965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
2610452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
2628965ea79SLois Curfman McInnes   starts[0] = 0;
2638965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2648965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2658965ea79SLois Curfman McInnes     svalues[starts[owner[i]]++] = rows[i];
2668965ea79SLois Curfman McInnes   }
2678965ea79SLois Curfman McInnes   ISRestoreIndices(is,&rows);
2688965ea79SLois Curfman McInnes 
2698965ea79SLois Curfman McInnes   starts[0] = 0;
2708965ea79SLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2718965ea79SLois Curfman McInnes   count = 0;
2728965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2738965ea79SLois Curfman McInnes     if (procs[i]) {
2748965ea79SLois Curfman McInnes       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
2758965ea79SLois Curfman McInnes     }
2768965ea79SLois Curfman McInnes   }
2770452661fSBarry Smith   PetscFree(starts);
2788965ea79SLois Curfman McInnes 
2798965ea79SLois Curfman McInnes   base = owners[rank];
2808965ea79SLois Curfman McInnes 
2818965ea79SLois Curfman McInnes   /*  wait on receives */
2820452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
2838965ea79SLois Curfman McInnes   source = lens + nrecvs;
2848965ea79SLois Curfman McInnes   count  = nrecvs; slen = 0;
2858965ea79SLois Curfman McInnes   while (count) {
2868965ea79SLois Curfman McInnes     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2878965ea79SLois Curfman McInnes     /* unpack receives into our local space */
2888965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPI_INT,&n);
2898965ea79SLois Curfman McInnes     source[imdex]  = recv_status.MPI_SOURCE;
2908965ea79SLois Curfman McInnes     lens[imdex]  = n;
2918965ea79SLois Curfman McInnes     slen += n;
2928965ea79SLois Curfman McInnes     count--;
2938965ea79SLois Curfman McInnes   }
2940452661fSBarry Smith   PetscFree(recv_waits);
2958965ea79SLois Curfman McInnes 
2968965ea79SLois Curfman McInnes   /* move the data into the send scatter */
2970452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
2988965ea79SLois Curfman McInnes   count = 0;
2998965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
3008965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
3018965ea79SLois Curfman McInnes     for ( j=0; j<lens[i]; j++ ) {
3028965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
3038965ea79SLois Curfman McInnes     }
3048965ea79SLois Curfman McInnes   }
3050452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
3060452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
3078965ea79SLois Curfman McInnes 
3088965ea79SLois Curfman McInnes   /* actually zap the local rows */
3098965ea79SLois Curfman McInnes   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
3108965ea79SLois Curfman McInnes   PLogObjectParent(A,istmp);
3110452661fSBarry Smith   PetscFree(lrows);
3128965ea79SLois Curfman McInnes   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
3138965ea79SLois Curfman McInnes   ierr = ISDestroy(istmp); CHKERRQ(ierr);
3148965ea79SLois Curfman McInnes 
3158965ea79SLois Curfman McInnes   /* wait on sends */
3168965ea79SLois Curfman McInnes   if (nsends) {
3170452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
3188965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
3198965ea79SLois Curfman McInnes     MPI_Waitall(nsends,send_waits,send_status);
3200452661fSBarry Smith     PetscFree(send_status);
3218965ea79SLois Curfman McInnes   }
3220452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
3238965ea79SLois Curfman McInnes 
3248965ea79SLois Curfman McInnes   return 0;
3258965ea79SLois Curfman McInnes }
3268965ea79SLois Curfman McInnes 
32739ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
3288965ea79SLois Curfman McInnes {
32939ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3308965ea79SLois Curfman McInnes   int          ierr;
33139ddd567SLois Curfman McInnes   if (!mdn->assembled)
33239ddd567SLois Curfman McInnes     SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first");
33339ddd567SLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
33439ddd567SLois Curfman McInnes   CHKERRQ(ierr);
33539ddd567SLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
33639ddd567SLois Curfman McInnes   CHKERRQ(ierr);
33739ddd567SLois Curfman McInnes   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
3388965ea79SLois Curfman McInnes   return 0;
3398965ea79SLois Curfman McInnes }
3408965ea79SLois Curfman McInnes 
34139ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
3428965ea79SLois Curfman McInnes {
34339ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3448965ea79SLois Curfman McInnes   int          ierr;
34539ddd567SLois Curfman McInnes   if (!mdn->assembled)
34639ddd567SLois Curfman McInnes     SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first");
3473501a2bdSLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
34839ddd567SLois Curfman McInnes   CHKERRQ(ierr);
3493501a2bdSLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
35039ddd567SLois Curfman McInnes   CHKERRQ(ierr);
35139ddd567SLois Curfman McInnes   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
3528965ea79SLois Curfman McInnes   return 0;
3538965ea79SLois Curfman McInnes }
3548965ea79SLois Curfman McInnes 
355096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
356096963f5SLois Curfman McInnes {
357096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
358096963f5SLois Curfman McInnes   int          ierr;
3593501a2bdSLois Curfman McInnes   Scalar       zero = 0.0;
360096963f5SLois Curfman McInnes 
361096963f5SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix");
3623501a2bdSLois Curfman McInnes   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
363096963f5SLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
364096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
365096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
366096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
367096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
368096963f5SLois Curfman McInnes   return 0;
369096963f5SLois Curfman McInnes }
370096963f5SLois Curfman McInnes 
371096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
372096963f5SLois Curfman McInnes {
373096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
374096963f5SLois Curfman McInnes   int          ierr;
375096963f5SLois Curfman McInnes 
376096963f5SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix");
3773501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
3783501a2bdSLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
379096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
380096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
381096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
382096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
383096963f5SLois Curfman McInnes   return 0;
384096963f5SLois Curfman McInnes }
385096963f5SLois Curfman McInnes 
38639ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v)
3878965ea79SLois Curfman McInnes {
38839ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
389096963f5SLois Curfman McInnes   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
390096963f5SLois Curfman McInnes   int          ierr, i, n, m = a->m, radd;
391096963f5SLois Curfman McInnes   Scalar       *x;
392ed3cc1f0SBarry Smith 
39339ddd567SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix");
394096963f5SLois Curfman McInnes   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
395096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
396096963f5SLois Curfman McInnes   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
397096963f5SLois Curfman McInnes   radd = a->rstart*m*m;
398096963f5SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
399096963f5SLois Curfman McInnes     x[i] = aloc->v[radd + i*m + i];
400096963f5SLois Curfman McInnes   }
401096963f5SLois Curfman McInnes   return 0;
4028965ea79SLois Curfman McInnes }
4038965ea79SLois Curfman McInnes 
40439ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj)
4058965ea79SLois Curfman McInnes {
4068965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
4073501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4088965ea79SLois Curfman McInnes   int          ierr;
409ed3cc1f0SBarry Smith 
4108965ea79SLois Curfman McInnes #if defined(PETSC_LOG)
4113501a2bdSLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
4128965ea79SLois Curfman McInnes #endif
4130452661fSBarry Smith   PetscFree(mdn->rowners);
4143501a2bdSLois Curfman McInnes   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
4153501a2bdSLois Curfman McInnes   if (mdn->lvec)   VecDestroy(mdn->lvec);
4163501a2bdSLois Curfman McInnes   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
4170452661fSBarry Smith   PetscFree(mdn);
4188965ea79SLois Curfman McInnes   PLogObjectDestroy(mat);
4190452661fSBarry Smith   PetscHeaderDestroy(mat);
4208965ea79SLois Curfman McInnes   return 0;
4218965ea79SLois Curfman McInnes }
42239ddd567SLois Curfman McInnes 
4238965ea79SLois Curfman McInnes #include "pinclude/pviewer.h"
4248965ea79SLois Curfman McInnes 
42539ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
4268965ea79SLois Curfman McInnes {
42739ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4288965ea79SLois Curfman McInnes   int          ierr;
42939ddd567SLois Curfman McInnes   if (mdn->size == 1) {
43039ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4318965ea79SLois Curfman McInnes   }
43239ddd567SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
4338965ea79SLois Curfman McInnes   return 0;
4348965ea79SLois Curfman McInnes }
4358965ea79SLois Curfman McInnes 
43639ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
4378965ea79SLois Curfman McInnes {
43839ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
43939ddd567SLois Curfman McInnes   int          ierr, format;
4408965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
4418965ea79SLois Curfman McInnes   FILE         *fd;
4428965ea79SLois Curfman McInnes 
4433501a2bdSLois Curfman McInnes   ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
4448965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
4458965ea79SLois Curfman McInnes     ierr = ViewerFileGetFormat_Private(viewer,&format);
4463501a2bdSLois Curfman McInnes     if (format == FILE_FORMAT_INFO_DETAILED) {
447096963f5SLois Curfman McInnes       int nz, nzalloc, mem, rank;
448096963f5SLois Curfman McInnes       MPI_Comm_rank(mat->comm,&rank);
449096963f5SLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
450096963f5SLois Curfman McInnes       MPIU_Seq_begin(mat->comm,1);
4513501a2bdSLois Curfman McInnes         fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
452096963f5SLois Curfman McInnes             rank,mdn->m,nz,nzalloc,mem);
453096963f5SLois Curfman McInnes       fflush(fd);
454096963f5SLois Curfman McInnes       MPIU_Seq_end(mat->comm,1);
4553501a2bdSLois Curfman McInnes       ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
4563501a2bdSLois Curfman McInnes       return 0;
4573501a2bdSLois Curfman McInnes     }
4583501a2bdSLois Curfman McInnes     else if (format == FILE_FORMAT_INFO) {
459096963f5SLois Curfman McInnes       return 0;
4608965ea79SLois Curfman McInnes     }
4618965ea79SLois Curfman McInnes   }
4628965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER) {
4638965ea79SLois Curfman McInnes     MPIU_Seq_begin(mat->comm,1);
46439ddd567SLois Curfman McInnes     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
46539ddd567SLois Curfman McInnes              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
46639ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4678965ea79SLois Curfman McInnes     fflush(fd);
4688965ea79SLois Curfman McInnes     MPIU_Seq_end(mat->comm,1);
4698965ea79SLois Curfman McInnes   }
4708965ea79SLois Curfman McInnes   else {
47139ddd567SLois Curfman McInnes     int size = mdn->size, rank = mdn->rank;
4728965ea79SLois Curfman McInnes     if (size == 1) {
47339ddd567SLois Curfman McInnes       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4748965ea79SLois Curfman McInnes     }
4758965ea79SLois Curfman McInnes     else {
4768965ea79SLois Curfman McInnes       /* assemble the entire matrix onto first processor. */
4778965ea79SLois Curfman McInnes       Mat          A;
47839ddd567SLois Curfman McInnes       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
47939ddd567SLois Curfman McInnes       Scalar       *vals;
48039ddd567SLois Curfman McInnes       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
4818965ea79SLois Curfman McInnes 
4828965ea79SLois Curfman McInnes       if (!rank) {
483*18f449edSLois Curfman McInnes         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,0,&A); CHKERRQ(ierr);
4848965ea79SLois Curfman McInnes       }
4858965ea79SLois Curfman McInnes       else {
486*18f449edSLois Curfman McInnes         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,0,&A); CHKERRQ(ierr);
4878965ea79SLois Curfman McInnes       }
4888965ea79SLois Curfman McInnes       PLogObjectParent(mat,A);
4898965ea79SLois Curfman McInnes 
49039ddd567SLois Curfman McInnes       /* Copy the matrix ... This isn't the most efficient means,
49139ddd567SLois Curfman McInnes          but it's quick for now */
49239ddd567SLois Curfman McInnes       row = mdn->rstart; m = Amdn->m;
4938965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
49439ddd567SLois Curfman McInnes         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
49539ddd567SLois Curfman McInnes         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
49639ddd567SLois Curfman McInnes         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
49739ddd567SLois Curfman McInnes         row++;
4988965ea79SLois Curfman McInnes       }
4998965ea79SLois Curfman McInnes 
5008965ea79SLois Curfman McInnes       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5018965ea79SLois Curfman McInnes       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5028965ea79SLois Curfman McInnes       if (!rank) {
50339ddd567SLois Curfman McInnes         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
5048965ea79SLois Curfman McInnes       }
5058965ea79SLois Curfman McInnes       ierr = MatDestroy(A); CHKERRQ(ierr);
5068965ea79SLois Curfman McInnes     }
5078965ea79SLois Curfman McInnes   }
5088965ea79SLois Curfman McInnes   return 0;
5098965ea79SLois Curfman McInnes }
5108965ea79SLois Curfman McInnes 
51139ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer)
5128965ea79SLois Curfman McInnes {
5138965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
51439ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
5158965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
51639ddd567SLois Curfman McInnes   int          ierr;
5178965ea79SLois Curfman McInnes 
51839ddd567SLois Curfman McInnes   if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix");
5198965ea79SLois Curfman McInnes   if (!viewer) {
5208965ea79SLois Curfman McInnes     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
5218965ea79SLois Curfman McInnes   }
52239ddd567SLois Curfman McInnes   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
52339ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5248965ea79SLois Curfman McInnes   }
5258965ea79SLois Curfman McInnes   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
52639ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5278965ea79SLois Curfman McInnes   }
5288965ea79SLois Curfman McInnes   else if (vobj->type == BINARY_FILE_VIEWER) {
52939ddd567SLois Curfman McInnes     return MatView_MPIDense_Binary(mat,viewer);
5308965ea79SLois Curfman McInnes   }
5318965ea79SLois Curfman McInnes   return 0;
5328965ea79SLois Curfman McInnes }
5338965ea79SLois Curfman McInnes 
5343501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,
5358965ea79SLois Curfman McInnes                              int *nzalloc,int *mem)
5368965ea79SLois Curfman McInnes {
5373501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5383501a2bdSLois Curfman McInnes   Mat          mdn = mat->A;
53939ddd567SLois Curfman McInnes   int          ierr, isend[3], irecv[3];
5408965ea79SLois Curfman McInnes 
5413501a2bdSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
5428965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
5438965ea79SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
5448965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
5453501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
5468965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5478965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
5483501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
5498965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5508965ea79SLois Curfman McInnes   }
5518965ea79SLois Curfman McInnes   return 0;
5528965ea79SLois Curfman McInnes }
5538965ea79SLois Curfman McInnes 
55439ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op)
5558965ea79SLois Curfman McInnes {
55639ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
5578965ea79SLois Curfman McInnes 
5588965ea79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
5598965ea79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
5608965ea79SLois Curfman McInnes       op == COLUMNS_SORTED ||
5618965ea79SLois Curfman McInnes       op == ROW_ORIENTED) {
5628965ea79SLois Curfman McInnes         MatSetOption(a->A,op);
5638965ea79SLois Curfman McInnes   }
5648965ea79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
5658965ea79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
5668965ea79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
5678965ea79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
56839ddd567SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
5698965ea79SLois Curfman McInnes   else if (op == COLUMN_ORIENTED)
57039ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");}
5718965ea79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
57239ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
5738965ea79SLois Curfman McInnes   else
57439ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
5758965ea79SLois Curfman McInnes   return 0;
5768965ea79SLois Curfman McInnes }
5778965ea79SLois Curfman McInnes 
5783501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n)
5798965ea79SLois Curfman McInnes {
5803501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5818965ea79SLois Curfman McInnes   *m = mat->M; *n = mat->N;
5828965ea79SLois Curfman McInnes   return 0;
5838965ea79SLois Curfman McInnes }
5848965ea79SLois Curfman McInnes 
5853501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
5868965ea79SLois Curfman McInnes {
5873501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5888965ea79SLois Curfman McInnes   *m = mat->m; *n = mat->N;
5898965ea79SLois Curfman McInnes   return 0;
5908965ea79SLois Curfman McInnes }
5918965ea79SLois Curfman McInnes 
5923501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
5938965ea79SLois Curfman McInnes {
5943501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5958965ea79SLois Curfman McInnes   *m = mat->rstart; *n = mat->rend;
5968965ea79SLois Curfman McInnes   return 0;
5978965ea79SLois Curfman McInnes }
5988965ea79SLois Curfman McInnes 
5993501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
6008965ea79SLois Curfman McInnes {
6013501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
60239ddd567SLois Curfman McInnes   int          lrow, rstart = mat->rstart, rend = mat->rend;
6038965ea79SLois Curfman McInnes 
60439ddd567SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
6058965ea79SLois Curfman McInnes   lrow = row - rstart;
60639ddd567SLois Curfman McInnes   return MatGetRow(mat->A,lrow,nz,idx,v);
6078965ea79SLois Curfman McInnes }
6088965ea79SLois Curfman McInnes 
60939ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
6108965ea79SLois Curfman McInnes {
6110452661fSBarry Smith   if (idx) PetscFree(*idx);
6120452661fSBarry Smith   if (v) PetscFree(*v);
6138965ea79SLois Curfman McInnes   return 0;
6148965ea79SLois Curfman McInnes }
6158965ea79SLois Curfman McInnes 
616cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
617096963f5SLois Curfman McInnes {
6183501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
6193501a2bdSLois Curfman McInnes   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
6203501a2bdSLois Curfman McInnes   int          ierr, i, j;
6213501a2bdSLois Curfman McInnes   double       sum = 0.0;
6223501a2bdSLois Curfman McInnes   Scalar       *v = mat->v;
6233501a2bdSLois Curfman McInnes 
6243501a2bdSLois Curfman McInnes   if (!mdn->assembled) SETERRQ(1,"MatNorm_MPIDense:Must assemble matrix");
6253501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
6263501a2bdSLois Curfman McInnes     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
6273501a2bdSLois Curfman McInnes   } else {
6283501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
6293501a2bdSLois Curfman McInnes       for (i=0; i<mat->n*mat->m; i++ ) {
6303501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX)
6313501a2bdSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
6323501a2bdSLois Curfman McInnes #else
6333501a2bdSLois Curfman McInnes         sum += (*v)*(*v); v++;
6343501a2bdSLois Curfman McInnes #endif
6353501a2bdSLois Curfman McInnes       }
6363501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
6373501a2bdSLois Curfman McInnes       *norm = sqrt(*norm);
6383501a2bdSLois Curfman McInnes       PLogFlops(2*mat->n*mat->m);
6393501a2bdSLois Curfman McInnes     }
6403501a2bdSLois Curfman McInnes     else if (type == NORM_1) {
6413501a2bdSLois Curfman McInnes       double *tmp, *tmp2;
6420452661fSBarry Smith       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
6433501a2bdSLois Curfman McInnes       tmp2 = tmp + mdn->N;
644cddf8d76SBarry Smith       PetscMemzero(tmp,2*mdn->N*sizeof(double));
645096963f5SLois Curfman McInnes       *norm = 0.0;
6463501a2bdSLois Curfman McInnes       v = mat->v;
6473501a2bdSLois Curfman McInnes       for ( j=0; j<mat->n; j++ ) {
6483501a2bdSLois Curfman McInnes         for ( i=0; i<mat->m; i++ ) {
64967e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
6503501a2bdSLois Curfman McInnes         }
6513501a2bdSLois Curfman McInnes       }
6523501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
6533501a2bdSLois Curfman McInnes       for ( j=0; j<mdn->N; j++ ) {
6543501a2bdSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
6553501a2bdSLois Curfman McInnes       }
6560452661fSBarry Smith       PetscFree(tmp);
6573501a2bdSLois Curfman McInnes       PLogFlops(mat->n*mat->m);
6583501a2bdSLois Curfman McInnes     }
6593501a2bdSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
6603501a2bdSLois Curfman McInnes       double ntemp;
6613501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
6623501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
6633501a2bdSLois Curfman McInnes     }
6643501a2bdSLois Curfman McInnes     else {
6653501a2bdSLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
6663501a2bdSLois Curfman McInnes     }
6673501a2bdSLois Curfman McInnes   }
6683501a2bdSLois Curfman McInnes   return 0;
6693501a2bdSLois Curfman McInnes }
6703501a2bdSLois Curfman McInnes 
6713501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout)
6723501a2bdSLois Curfman McInnes {
6733501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
6743501a2bdSLois Curfman McInnes   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
6753501a2bdSLois Curfman McInnes   Mat          B;
6763501a2bdSLois Curfman McInnes   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
6773501a2bdSLois Curfman McInnes   int          j, i, ierr;
6783501a2bdSLois Curfman McInnes   Scalar       *v;
6793501a2bdSLois Curfman McInnes 
6803501a2bdSLois Curfman McInnes   if (!matout && M != N)
6813501a2bdSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
682*18f449edSLois Curfman McInnes   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,&B); CHKERRQ(ierr);
6833501a2bdSLois Curfman McInnes 
6843501a2bdSLois Curfman McInnes   m = Aloc->m; n = Aloc->n; v = Aloc->v;
6850452661fSBarry Smith   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
6863501a2bdSLois Curfman McInnes   for ( j=0; j<n; j++ ) {
6873501a2bdSLois Curfman McInnes     for (i=0; i<m; i++) rwork[i] = rstart + i;
6883501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
6893501a2bdSLois Curfman McInnes     v += m;
6903501a2bdSLois Curfman McInnes   }
6910452661fSBarry Smith   PetscFree(rwork);
6923501a2bdSLois Curfman McInnes   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
6933501a2bdSLois Curfman McInnes   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
6943501a2bdSLois Curfman McInnes   if (matout) {
6953501a2bdSLois Curfman McInnes     *matout = B;
6963501a2bdSLois Curfman McInnes   } else {
6973501a2bdSLois Curfman McInnes     /* This isn't really an in-place transpose, but free data struct from a */
6980452661fSBarry Smith     PetscFree(a->rowners);
6993501a2bdSLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
7003501a2bdSLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
7013501a2bdSLois Curfman McInnes     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
7020452661fSBarry Smith     PetscFree(a);
7033501a2bdSLois Curfman McInnes     PetscMemcpy(A,B,sizeof(struct _Mat));
7040452661fSBarry Smith     PetscHeaderDestroy(B);
7053501a2bdSLois Curfman McInnes   }
706096963f5SLois Curfman McInnes   return 0;
707096963f5SLois Curfman McInnes }
708096963f5SLois Curfman McInnes 
70955659b69SBarry Smith static int MatCopyPrivate_MPIDense(Mat,Mat *,int);
7108965ea79SLois Curfman McInnes 
7118965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
71239ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense,
71339ddd567SLois Curfman McInnes        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
71439ddd567SLois Curfman McInnes        MatMult_MPIDense,MatMultAdd_MPIDense,
715096963f5SLois Curfman McInnes        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
71639ddd567SLois Curfman McInnes        0,0,
71739ddd567SLois Curfman McInnes        0,0,0,
7183501a2bdSLois Curfman McInnes        0,0,MatTranspose_MPIDense,
71939ddd567SLois Curfman McInnes        MatGetInfo_MPIDense,0,
720096963f5SLois Curfman McInnes        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
72139ddd567SLois Curfman McInnes        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
7228965ea79SLois Curfman McInnes        0,
72339ddd567SLois Curfman McInnes        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
72439ddd567SLois Curfman McInnes        0,
72539ddd567SLois Curfman McInnes        0,0,0,0,
72639ddd567SLois Curfman McInnes        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
72739ddd567SLois Curfman McInnes        MatGetOwnershipRange_MPIDense,
72839ddd567SLois Curfman McInnes        0,0,
72939ddd567SLois Curfman McInnes        0,0,0,0,0,MatCopyPrivate_MPIDense};
7308965ea79SLois Curfman McInnes 
7318965ea79SLois Curfman McInnes /*@C
73239ddd567SLois Curfman McInnes    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
7338965ea79SLois Curfman McInnes 
7348965ea79SLois Curfman McInnes    Input Parameters:
7358965ea79SLois Curfman McInnes .  comm - MPI communicator
7368965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
7378965ea79SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
7388965ea79SLois Curfman McInnes            if N is given)
7398965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
7408965ea79SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
7418965ea79SLois Curfman McInnes            if n is given)
742*18f449edSLois Curfman McInnes .  data - optional location of matrix data.  Set data=0 for PETSc to
743*18f449edSLois Curfman McInnes    control all matrix memory allocation.
7448965ea79SLois Curfman McInnes 
7458965ea79SLois Curfman McInnes    Output Parameter:
7468965ea79SLois Curfman McInnes .  newmat - the matrix
7478965ea79SLois Curfman McInnes 
7488965ea79SLois Curfman McInnes    Notes:
74939ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
75039ddd567SLois Curfman McInnes    storage by columns.
7518965ea79SLois Curfman McInnes 
752*18f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
753*18f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
754*18f449edSLois Curfman McInnes    set data=0.
755*18f449edSLois Curfman McInnes 
7568965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
7578965ea79SLois Curfman McInnes    (possibly both).
7588965ea79SLois Curfman McInnes 
7593501a2bdSLois Curfman McInnes    Currently, the only parallel dense matrix decomposition is by rows,
7603501a2bdSLois Curfman McInnes    so that n=N and each submatrix owns all of the global columns.
7613501a2bdSLois Curfman McInnes 
76239ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel
7638965ea79SLois Curfman McInnes 
76439ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
7658965ea79SLois Curfman McInnes @*/
766*18f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat)
7678965ea79SLois Curfman McInnes {
7688965ea79SLois Curfman McInnes   Mat          mat;
76939ddd567SLois Curfman McInnes   Mat_MPIDense *a;
77039ddd567SLois Curfman McInnes   int          ierr, i;
7718965ea79SLois Curfman McInnes 
772*18f449edSLois Curfman McInnes /* Note:  for now, this assumes that the user knows what he's doing if
773*18f449edSLois Curfman McInnes    data is specified above.  */
774*18f449edSLois Curfman McInnes 
7758965ea79SLois Curfman McInnes   *newmat         = 0;
7760452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
7778965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
7780452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
7798965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
78039ddd567SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIDense;
78139ddd567SLois Curfman McInnes   mat->view       = MatView_MPIDense;
7828965ea79SLois Curfman McInnes   mat->factor     = 0;
7838965ea79SLois Curfman McInnes 
7848965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
7858965ea79SLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
7868965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
7878965ea79SLois Curfman McInnes 
78839ddd567SLois Curfman McInnes   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
7898965ea79SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
79039ddd567SLois Curfman McInnes 
79139ddd567SLois Curfman McInnes   /* each row stores all columns */
79239ddd567SLois Curfman McInnes   if (N == PETSC_DECIDE) N = n;
793d7e8b826SBarry Smith   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
794d7e8b826SBarry Smith   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
7958965ea79SLois Curfman McInnes   a->N = N;
7968965ea79SLois Curfman McInnes   a->M = M;
79739ddd567SLois Curfman McInnes   a->m = m;
79839ddd567SLois Curfman McInnes   a->n = n;
7998965ea79SLois Curfman McInnes 
8008965ea79SLois Curfman McInnes   /* build local table of row and column ownerships */
801d7e8b826SBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
802d7e8b826SBarry Smith   a->cowners = a->rowners + a->size + 1;
803d7e8b826SBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
80439ddd567SLois Curfman McInnes                        sizeof(Mat_MPIDense));
8058965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
8068965ea79SLois Curfman McInnes   a->rowners[0] = 0;
8078965ea79SLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
8088965ea79SLois Curfman McInnes     a->rowners[i] += a->rowners[i-1];
8098965ea79SLois Curfman McInnes   }
8108965ea79SLois Curfman McInnes   a->rstart = a->rowners[a->rank];
8118965ea79SLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
812d7e8b826SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
813d7e8b826SBarry Smith   a->cowners[0] = 0;
814d7e8b826SBarry Smith   for ( i=2; i<=a->size; i++ ) {
815d7e8b826SBarry Smith     a->cowners[i] += a->cowners[i-1];
816d7e8b826SBarry Smith   }
8178965ea79SLois Curfman McInnes 
818*18f449edSLois Curfman McInnes   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
8198965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
8208965ea79SLois Curfman McInnes 
8218965ea79SLois Curfman McInnes   /* build cache for off array entries formed */
8228965ea79SLois Curfman McInnes   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
8238965ea79SLois Curfman McInnes 
8248965ea79SLois Curfman McInnes   /* stuff used for matrix vector multiply */
8258965ea79SLois Curfman McInnes   a->lvec      = 0;
8268965ea79SLois Curfman McInnes   a->Mvctx     = 0;
8278965ea79SLois Curfman McInnes   a->assembled = 0;
8288965ea79SLois Curfman McInnes 
8298965ea79SLois Curfman McInnes   *newmat = mat;
8308965ea79SLois Curfman McInnes   return 0;
8318965ea79SLois Curfman McInnes }
8328965ea79SLois Curfman McInnes 
8333501a2bdSLois Curfman McInnes static int MatCopyPrivate_MPIDense(Mat A,Mat *newmat,int cpvalues)
8348965ea79SLois Curfman McInnes {
8358965ea79SLois Curfman McInnes   Mat          mat;
8363501a2bdSLois Curfman McInnes   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
83739ddd567SLois Curfman McInnes   int          ierr;
8388965ea79SLois Curfman McInnes 
83939ddd567SLois Curfman McInnes   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix");
8408965ea79SLois Curfman McInnes   *newmat       = 0;
8410452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
8428965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
8430452661fSBarry Smith   mat->data     = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
8448965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
84539ddd567SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIDense;
84639ddd567SLois Curfman McInnes   mat->view     = MatView_MPIDense;
8473501a2bdSLois Curfman McInnes   mat->factor   = A->factor;
8488965ea79SLois Curfman McInnes 
8498965ea79SLois Curfman McInnes   a->m          = oldmat->m;
8508965ea79SLois Curfman McInnes   a->n          = oldmat->n;
8518965ea79SLois Curfman McInnes   a->M          = oldmat->M;
8528965ea79SLois Curfman McInnes   a->N          = oldmat->N;
8538965ea79SLois Curfman McInnes 
8548965ea79SLois Curfman McInnes   a->assembled  = 1;
8558965ea79SLois Curfman McInnes   a->rstart     = oldmat->rstart;
8568965ea79SLois Curfman McInnes   a->rend       = oldmat->rend;
8578965ea79SLois Curfman McInnes   a->size       = oldmat->size;
8588965ea79SLois Curfman McInnes   a->rank       = oldmat->rank;
8598965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
8608965ea79SLois Curfman McInnes 
8610452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
86239ddd567SLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
8638965ea79SLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
8648965ea79SLois Curfman McInnes   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
8658965ea79SLois Curfman McInnes 
8668965ea79SLois Curfman McInnes   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
8678965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->lvec);
86855659b69SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
8698965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->Mvctx);
8708965ea79SLois Curfman McInnes   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
8718965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
8728965ea79SLois Curfman McInnes   *newmat = mat;
8738965ea79SLois Curfman McInnes   return 0;
8748965ea79SLois Curfman McInnes }
8758965ea79SLois Curfman McInnes 
8768965ea79SLois Curfman McInnes #include "sysio.h"
8778965ea79SLois Curfman McInnes 
87839ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
8798965ea79SLois Curfman McInnes {
8808965ea79SLois Curfman McInnes   Mat          A;
8818965ea79SLois Curfman McInnes   int          i, nz, ierr, j,rstart, rend, fd;
8828965ea79SLois Curfman McInnes   Scalar       *vals,*svals;
8838965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) bview;
8848965ea79SLois Curfman McInnes   MPI_Comm     comm = vobj->comm;
8858965ea79SLois Curfman McInnes   MPI_Status   status;
8868965ea79SLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
8878965ea79SLois Curfman McInnes   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
8888965ea79SLois Curfman McInnes   int          tag = ((PetscObject)bview)->tag;
8898965ea79SLois Curfman McInnes 
8908965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
8918965ea79SLois Curfman McInnes   if (!rank) {
8928965ea79SLois Curfman McInnes     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
8938965ea79SLois Curfman McInnes     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
89439ddd567SLois Curfman McInnes     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
8958965ea79SLois Curfman McInnes   }
8968965ea79SLois Curfman McInnes 
8978965ea79SLois Curfman McInnes   MPI_Bcast(header+1,3,MPI_INT,0,comm);
8988965ea79SLois Curfman McInnes   M = header[1]; N = header[2];
8998965ea79SLois Curfman McInnes   /* determine ownership of all rows */
9008965ea79SLois Curfman McInnes   m = M/size + ((M % size) > rank);
9010452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
9028965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
9038965ea79SLois Curfman McInnes   rowners[0] = 0;
9048965ea79SLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
9058965ea79SLois Curfman McInnes     rowners[i] += rowners[i-1];
9068965ea79SLois Curfman McInnes   }
9078965ea79SLois Curfman McInnes   rstart = rowners[rank];
9088965ea79SLois Curfman McInnes   rend   = rowners[rank+1];
9098965ea79SLois Curfman McInnes 
9108965ea79SLois Curfman McInnes   /* distribute row lengths to all processors */
9110452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
9128965ea79SLois Curfman McInnes   offlens = ourlens + (rend-rstart);
9138965ea79SLois Curfman McInnes   if (!rank) {
9140452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
9158965ea79SLois Curfman McInnes     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
9160452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
9178965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
9188965ea79SLois Curfman McInnes     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
9190452661fSBarry Smith     PetscFree(sndcounts);
9208965ea79SLois Curfman McInnes   }
9218965ea79SLois Curfman McInnes   else {
9228965ea79SLois Curfman McInnes     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
9238965ea79SLois Curfman McInnes   }
9248965ea79SLois Curfman McInnes 
9258965ea79SLois Curfman McInnes   if (!rank) {
9268965ea79SLois Curfman McInnes     /* calculate the number of nonzeros on each processor */
9270452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
928cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
9298965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
9308965ea79SLois Curfman McInnes       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
9318965ea79SLois Curfman McInnes         procsnz[i] += rowlengths[j];
9328965ea79SLois Curfman McInnes       }
9338965ea79SLois Curfman McInnes     }
9340452661fSBarry Smith     PetscFree(rowlengths);
9358965ea79SLois Curfman McInnes 
9368965ea79SLois Curfman McInnes     /* determine max buffer needed and allocate it */
9378965ea79SLois Curfman McInnes     maxnz = 0;
9388965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
9390452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
9408965ea79SLois Curfman McInnes     }
9410452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
9428965ea79SLois Curfman McInnes 
9438965ea79SLois Curfman McInnes     /* read in my part of the matrix column indices  */
9448965ea79SLois Curfman McInnes     nz = procsnz[0];
9450452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
9468965ea79SLois Curfman McInnes     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
9478965ea79SLois Curfman McInnes 
9488965ea79SLois Curfman McInnes     /* read in every one elses and ship off */
9498965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
9508965ea79SLois Curfman McInnes       nz = procsnz[i];
9518965ea79SLois Curfman McInnes       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
9528965ea79SLois Curfman McInnes       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
9538965ea79SLois Curfman McInnes     }
9540452661fSBarry Smith     PetscFree(cols);
9558965ea79SLois Curfman McInnes   }
9568965ea79SLois Curfman McInnes   else {
9578965ea79SLois Curfman McInnes     /* determine buffer space needed for message */
9588965ea79SLois Curfman McInnes     nz = 0;
9598965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
9608965ea79SLois Curfman McInnes       nz += ourlens[i];
9618965ea79SLois Curfman McInnes     }
9620452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
9638965ea79SLois Curfman McInnes 
9648965ea79SLois Curfman McInnes     /* receive message of column indices*/
9658965ea79SLois Curfman McInnes     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
9668965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPI_INT,&maxnz);
96739ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
9688965ea79SLois Curfman McInnes   }
9698965ea79SLois Curfman McInnes 
9708965ea79SLois Curfman McInnes   /* loop over local rows, determining number of off diagonal entries */
971cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
9728965ea79SLois Curfman McInnes   jj = 0;
9738965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
9748965ea79SLois Curfman McInnes     for ( j=0; j<ourlens[i]; j++ ) {
9758965ea79SLois Curfman McInnes       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
9768965ea79SLois Curfman McInnes       jj++;
9778965ea79SLois Curfman McInnes     }
9788965ea79SLois Curfman McInnes   }
9798965ea79SLois Curfman McInnes 
9808965ea79SLois Curfman McInnes   /* create our matrix */
9818965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
9828965ea79SLois Curfman McInnes     ourlens[i] -= offlens[i];
9838965ea79SLois Curfman McInnes   }
98439ddd567SLois Curfman McInnes   if (type == MATMPIDENSE) {
985*18f449edSLois Curfman McInnes     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,0,newmat); CHKERRQ(ierr);
9868965ea79SLois Curfman McInnes   }
9878965ea79SLois Curfman McInnes   A = *newmat;
9888965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
9898965ea79SLois Curfman McInnes     ourlens[i] += offlens[i];
9908965ea79SLois Curfman McInnes   }
9918965ea79SLois Curfman McInnes 
9928965ea79SLois Curfman McInnes   if (!rank) {
9930452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
9948965ea79SLois Curfman McInnes 
9958965ea79SLois Curfman McInnes     /* read in my part of the matrix numerical values  */
9968965ea79SLois Curfman McInnes     nz = procsnz[0];
9978965ea79SLois Curfman McInnes     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
9988965ea79SLois Curfman McInnes 
9998965ea79SLois Curfman McInnes     /* insert into matrix */
10008965ea79SLois Curfman McInnes     jj      = rstart;
10018965ea79SLois Curfman McInnes     smycols = mycols;
10028965ea79SLois Curfman McInnes     svals   = vals;
10038965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10048965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10058965ea79SLois Curfman McInnes       smycols += ourlens[i];
10068965ea79SLois Curfman McInnes       svals   += ourlens[i];
10078965ea79SLois Curfman McInnes       jj++;
10088965ea79SLois Curfman McInnes     }
10098965ea79SLois Curfman McInnes 
10108965ea79SLois Curfman McInnes     /* read in other processors and ship out */
10118965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
10128965ea79SLois Curfman McInnes       nz = procsnz[i];
10138965ea79SLois Curfman McInnes       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
10148965ea79SLois Curfman McInnes       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
10158965ea79SLois Curfman McInnes     }
10160452661fSBarry Smith     PetscFree(procsnz);
10178965ea79SLois Curfman McInnes   }
10188965ea79SLois Curfman McInnes   else {
10198965ea79SLois Curfman McInnes     /* receive numeric values */
10200452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
10218965ea79SLois Curfman McInnes 
10228965ea79SLois Curfman McInnes     /* receive message of values*/
10238965ea79SLois Curfman McInnes     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
10248965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
102539ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
10268965ea79SLois Curfman McInnes 
10278965ea79SLois Curfman McInnes     /* insert into matrix */
10288965ea79SLois Curfman McInnes     jj      = rstart;
10298965ea79SLois Curfman McInnes     smycols = mycols;
10308965ea79SLois Curfman McInnes     svals   = vals;
10318965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10328965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10338965ea79SLois Curfman McInnes       smycols += ourlens[i];
10348965ea79SLois Curfman McInnes       svals   += ourlens[i];
10358965ea79SLois Curfman McInnes       jj++;
10368965ea79SLois Curfman McInnes     }
10378965ea79SLois Curfman McInnes   }
10380452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
10398965ea79SLois Curfman McInnes 
10408965ea79SLois Curfman McInnes   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
10418965ea79SLois Curfman McInnes   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
10428965ea79SLois Curfman McInnes   return 0;
10438965ea79SLois Curfman McInnes }
1044