xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 0452661f6076db5def9dff5bdaf23ff50d8e3a7f)
18965ea79SLois Curfman McInnes #ifndef lint
2*0452661fSBarry Smith static char vcid[] = "$Id: mpidense.c,v 1.7 1995/11/01 19:10:02 bsmith Exp bsmith $";
38965ea79SLois Curfman McInnes #endif
48965ea79SLois Curfman McInnes 
58965ea79SLois Curfman McInnes #include "mpidense.h"
68965ea79SLois Curfman McInnes #include "vec/vecimpl.h"
78965ea79SLois Curfman McInnes #include "inline/spops.h"
88965ea79SLois Curfman McInnes 
939ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
108965ea79SLois Curfman McInnes                                int *idxn,Scalar *v,InsertMode addv)
118965ea79SLois Curfman McInnes {
1239ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1339ddd567SLois Curfman McInnes   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
148965ea79SLois Curfman McInnes 
1539ddd567SLois Curfman McInnes   if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) {
1639ddd567SLois Curfman McInnes     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
178965ea79SLois Curfman McInnes   }
1839ddd567SLois Curfman McInnes   mdn->insertmode = addv;
198965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
2039ddd567SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
2139ddd567SLois Curfman McInnes     if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
228965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
238965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
248965ea79SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
2539ddd567SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
2639ddd567SLois Curfman McInnes         if (idxn[j] >= mdn->N)
2739ddd567SLois Curfman McInnes           SETERRQ(1,"MatSetValues_MPIDense:Column too large");
2839ddd567SLois Curfman McInnes         ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv);
2939ddd567SLois Curfman McInnes         CHKERRQ(ierr);
308965ea79SLois Curfman McInnes       }
318965ea79SLois Curfman McInnes     }
328965ea79SLois Curfman McInnes     else {
3339ddd567SLois Curfman McInnes       ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv);
3439ddd567SLois Curfman McInnes       CHKERRQ(ierr);
358965ea79SLois Curfman McInnes     }
368965ea79SLois Curfman McInnes   }
378965ea79SLois Curfman McInnes   return 0;
388965ea79SLois Curfman McInnes }
398965ea79SLois Curfman McInnes 
4039ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
418965ea79SLois Curfman McInnes {
4239ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
438965ea79SLois Curfman McInnes   MPI_Comm     comm = mat->comm;
4439ddd567SLois Curfman McInnes   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
458965ea79SLois Curfman McInnes   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
4639ddd567SLois Curfman McInnes   int          tag = mat->tag, *owner,*starts,count,ierr;
478965ea79SLois Curfman McInnes   InsertMode   addv;
4839ddd567SLois Curfman McInnes   MPI_Request  *send_waits,*recv_waits;
498965ea79SLois Curfman McInnes   Scalar       *rvalues,*svalues;
508965ea79SLois Curfman McInnes 
518965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
5239ddd567SLois Curfman McInnes   MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT,
5339ddd567SLois Curfman McInnes                 MPI_BOR,comm);
5439ddd567SLois Curfman McInnes   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
5539ddd567SLois Curfman McInnes     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
568965ea79SLois Curfman McInnes     }
5739ddd567SLois Curfman McInnes   mdn->insertmode = addv; /* in case this processor had no cache */
588965ea79SLois Curfman McInnes 
598965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
60*0452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
61cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
62*0452661fSBarry Smith   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
6339ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
6439ddd567SLois Curfman McInnes     idx = mdn->stash.idx[i];
658965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
668965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
678965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
688965ea79SLois Curfman McInnes       }
698965ea79SLois Curfman McInnes     }
708965ea79SLois Curfman McInnes   }
718965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
728965ea79SLois Curfman McInnes 
738965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
74*0452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
7539ddd567SLois Curfman McInnes   MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm);
768965ea79SLois Curfman McInnes   nreceives = work[rank];
7739ddd567SLois Curfman McInnes   MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm);
788965ea79SLois Curfman McInnes   nmax = work[rank];
79*0452661fSBarry Smith   PetscFree(work);
808965ea79SLois Curfman McInnes 
818965ea79SLois Curfman McInnes   /* post receives:
828965ea79SLois Curfman McInnes        1) each message will consist of ordered pairs
838965ea79SLois Curfman McInnes      (global index,value) we store the global index as a double
848965ea79SLois Curfman McInnes      to simplify the message passing.
858965ea79SLois Curfman McInnes        2) since we don't know how long each individual message is we
868965ea79SLois Curfman McInnes      allocate the largest needed buffer for each receive. Potentially
878965ea79SLois Curfman McInnes      this is a lot of wasted space.
888965ea79SLois Curfman McInnes 
898965ea79SLois Curfman McInnes        This could be done better.
908965ea79SLois Curfman McInnes   */
91*0452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
928965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
93*0452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
948965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
958965ea79SLois Curfman McInnes   for ( i=0; i<nreceives; i++ ) {
9639ddd567SLois Curfman McInnes     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
978965ea79SLois Curfman McInnes               comm,recv_waits+i);
988965ea79SLois Curfman McInnes   }
998965ea79SLois Curfman McInnes 
1008965ea79SLois Curfman McInnes   /* do sends:
1018965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
1028965ea79SLois Curfman McInnes          the ith processor
1038965ea79SLois Curfman McInnes   */
104*0452661fSBarry Smith   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
10539ddd567SLois Curfman McInnes   CHKPTRQ(svalues);
106*0452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
1078965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
108*0452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1098965ea79SLois Curfman McInnes   starts[0] = 0;
1108965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
11139ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
11239ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
11339ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
11439ddd567SLois Curfman McInnes     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
1158965ea79SLois Curfman McInnes   }
116*0452661fSBarry Smith   PetscFree(owner);
1178965ea79SLois Curfman McInnes   starts[0] = 0;
1188965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1198965ea79SLois Curfman McInnes   count = 0;
1208965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
1218965ea79SLois Curfman McInnes     if (procs[i]) {
12239ddd567SLois Curfman McInnes       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
1238965ea79SLois Curfman McInnes                 comm,send_waits+count++);
1248965ea79SLois Curfman McInnes     }
1258965ea79SLois Curfman McInnes   }
126*0452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
1278965ea79SLois Curfman McInnes 
1288965ea79SLois Curfman McInnes   /* Free cache space */
12939ddd567SLois Curfman McInnes   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
1308965ea79SLois Curfman McInnes 
13139ddd567SLois Curfman McInnes   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
13239ddd567SLois Curfman McInnes   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
13339ddd567SLois Curfman McInnes   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
13439ddd567SLois Curfman McInnes   mdn->rmax       = nmax;
1358965ea79SLois Curfman McInnes 
1368965ea79SLois Curfman McInnes   return 0;
1378965ea79SLois Curfman McInnes }
13839ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat);
1398965ea79SLois Curfman McInnes 
14039ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
1418965ea79SLois Curfman McInnes {
14239ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1438965ea79SLois Curfman McInnes   MPI_Status   *send_status,recv_status;
14439ddd567SLois Curfman McInnes   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
1458965ea79SLois Curfman McInnes   Scalar       *values,val;
14639ddd567SLois Curfman McInnes   InsertMode   addv = mdn->insertmode;
1478965ea79SLois Curfman McInnes 
1488965ea79SLois Curfman McInnes   /*  wait on receives */
1498965ea79SLois Curfman McInnes   while (count) {
15039ddd567SLois Curfman McInnes     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
1518965ea79SLois Curfman McInnes     /* unpack receives into our local space */
15239ddd567SLois Curfman McInnes     values = mdn->rvalues + 3*imdex*mdn->rmax;
1538965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
1548965ea79SLois Curfman McInnes     n = n/3;
1558965ea79SLois Curfman McInnes     for ( i=0; i<n; i++ ) {
15639ddd567SLois Curfman McInnes       row = (int) PETSCREAL(values[3*i]) - mdn->rstart;
1578965ea79SLois Curfman McInnes       col = (int) PETSCREAL(values[3*i+1]);
1588965ea79SLois Curfman McInnes       val = values[3*i+2];
15939ddd567SLois Curfman McInnes       if (col >= 0 && col < mdn->N) {
16039ddd567SLois Curfman McInnes         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
1618965ea79SLois Curfman McInnes       }
16239ddd567SLois Curfman McInnes       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
1638965ea79SLois Curfman McInnes     }
1648965ea79SLois Curfman McInnes     count--;
1658965ea79SLois Curfman McInnes   }
166*0452661fSBarry Smith   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
1678965ea79SLois Curfman McInnes 
1688965ea79SLois Curfman McInnes   /* wait on sends */
16939ddd567SLois Curfman McInnes   if (mdn->nsends) {
170*0452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) );
1718965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
17239ddd567SLois Curfman McInnes     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
173*0452661fSBarry Smith     PetscFree(send_status);
1748965ea79SLois Curfman McInnes   }
175*0452661fSBarry Smith   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
1768965ea79SLois Curfman McInnes 
17739ddd567SLois Curfman McInnes   mdn->insertmode = NOT_SET_VALUES;
17839ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
17939ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
1808965ea79SLois Curfman McInnes 
18139ddd567SLois Curfman McInnes   if (!mdn->assembled && mode == FINAL_ASSEMBLY) {
18239ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
1838965ea79SLois Curfman McInnes   }
18439ddd567SLois Curfman McInnes   mdn->assembled = 1;
1858965ea79SLois Curfman McInnes   return 0;
1868965ea79SLois Curfman McInnes }
1878965ea79SLois Curfman McInnes 
18839ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A)
1898965ea79SLois Curfman McInnes {
19039ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
19139ddd567SLois Curfman McInnes   return MatZeroEntries(l->A);
1928965ea79SLois Curfman McInnes }
1938965ea79SLois Curfman McInnes 
1948965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
1958965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
1968965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
1973501a2bdSLois Curfman McInnes    mdn->A and mdn->B directly and not through the MatZeroRows()
1988965ea79SLois Curfman McInnes    routine.
1998965ea79SLois Curfman McInnes */
20039ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
2018965ea79SLois Curfman McInnes {
20239ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
2038965ea79SLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
2048965ea79SLois Curfman McInnes   int            *procs,*nprocs,j,found,idx,nsends,*work;
2058965ea79SLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
2068965ea79SLois Curfman McInnes   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
2078965ea79SLois Curfman McInnes   int            *lens,imdex,*lrows,*values;
2088965ea79SLois Curfman McInnes   MPI_Comm       comm = A->comm;
2098965ea79SLois Curfman McInnes   MPI_Request    *send_waits,*recv_waits;
2108965ea79SLois Curfman McInnes   MPI_Status     recv_status,*send_status;
2118965ea79SLois Curfman McInnes   IS             istmp;
2128965ea79SLois Curfman McInnes 
21339ddd567SLois Curfman McInnes   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix");
2148965ea79SLois Curfman McInnes   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
2158965ea79SLois Curfman McInnes   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
2168965ea79SLois Curfman McInnes 
2178965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
218*0452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
219cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
220*0452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
2218965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2228965ea79SLois Curfman McInnes     idx = rows[i];
2238965ea79SLois Curfman McInnes     found = 0;
2248965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2258965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
2268965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2278965ea79SLois Curfman McInnes       }
2288965ea79SLois Curfman McInnes     }
22939ddd567SLois Curfman McInnes     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
2308965ea79SLois Curfman McInnes   }
2318965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2328965ea79SLois Curfman McInnes 
2338965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
234*0452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
2358965ea79SLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
2368965ea79SLois Curfman McInnes   nrecvs = work[rank];
2378965ea79SLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
2388965ea79SLois Curfman McInnes   nmax = work[rank];
239*0452661fSBarry Smith   PetscFree(work);
2408965ea79SLois Curfman McInnes 
2418965ea79SLois Curfman McInnes   /* post receives:   */
242*0452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
2438965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
244*0452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
2458965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
2468965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
2478965ea79SLois Curfman McInnes     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2488965ea79SLois Curfman McInnes   }
2498965ea79SLois Curfman McInnes 
2508965ea79SLois Curfman McInnes   /* do sends:
2518965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
2528965ea79SLois Curfman McInnes          the ith processor
2538965ea79SLois Curfman McInnes   */
254*0452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
255*0452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
2568965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
257*0452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
2588965ea79SLois Curfman McInnes   starts[0] = 0;
2598965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2608965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2618965ea79SLois Curfman McInnes     svalues[starts[owner[i]]++] = rows[i];
2628965ea79SLois Curfman McInnes   }
2638965ea79SLois Curfman McInnes   ISRestoreIndices(is,&rows);
2648965ea79SLois Curfman McInnes 
2658965ea79SLois Curfman McInnes   starts[0] = 0;
2668965ea79SLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2678965ea79SLois Curfman McInnes   count = 0;
2688965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2698965ea79SLois Curfman McInnes     if (procs[i]) {
2708965ea79SLois Curfman McInnes       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
2718965ea79SLois Curfman McInnes     }
2728965ea79SLois Curfman McInnes   }
273*0452661fSBarry Smith   PetscFree(starts);
2748965ea79SLois Curfman McInnes 
2758965ea79SLois Curfman McInnes   base = owners[rank];
2768965ea79SLois Curfman McInnes 
2778965ea79SLois Curfman McInnes   /*  wait on receives */
278*0452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
2798965ea79SLois Curfman McInnes   source = lens + nrecvs;
2808965ea79SLois Curfman McInnes   count  = nrecvs; slen = 0;
2818965ea79SLois Curfman McInnes   while (count) {
2828965ea79SLois Curfman McInnes     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2838965ea79SLois Curfman McInnes     /* unpack receives into our local space */
2848965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPI_INT,&n);
2858965ea79SLois Curfman McInnes     source[imdex]  = recv_status.MPI_SOURCE;
2868965ea79SLois Curfman McInnes     lens[imdex]  = n;
2878965ea79SLois Curfman McInnes     slen += n;
2888965ea79SLois Curfman McInnes     count--;
2898965ea79SLois Curfman McInnes   }
290*0452661fSBarry Smith   PetscFree(recv_waits);
2918965ea79SLois Curfman McInnes 
2928965ea79SLois Curfman McInnes   /* move the data into the send scatter */
293*0452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
2948965ea79SLois Curfman McInnes   count = 0;
2958965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
2968965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
2978965ea79SLois Curfman McInnes     for ( j=0; j<lens[i]; j++ ) {
2988965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
2998965ea79SLois Curfman McInnes     }
3008965ea79SLois Curfman McInnes   }
301*0452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
302*0452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
3038965ea79SLois Curfman McInnes 
3048965ea79SLois Curfman McInnes   /* actually zap the local rows */
3058965ea79SLois Curfman McInnes   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
3068965ea79SLois Curfman McInnes   PLogObjectParent(A,istmp);
307*0452661fSBarry Smith   PetscFree(lrows);
3088965ea79SLois Curfman McInnes   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
3098965ea79SLois Curfman McInnes   ierr = ISDestroy(istmp); CHKERRQ(ierr);
3108965ea79SLois Curfman McInnes 
3118965ea79SLois Curfman McInnes   /* wait on sends */
3128965ea79SLois Curfman McInnes   if (nsends) {
313*0452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
3148965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
3158965ea79SLois Curfman McInnes     MPI_Waitall(nsends,send_waits,send_status);
316*0452661fSBarry Smith     PetscFree(send_status);
3178965ea79SLois Curfman McInnes   }
318*0452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
3198965ea79SLois Curfman McInnes 
3208965ea79SLois Curfman McInnes   return 0;
3218965ea79SLois Curfman McInnes }
3228965ea79SLois Curfman McInnes 
32339ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
3248965ea79SLois Curfman McInnes {
32539ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3268965ea79SLois Curfman McInnes   int          ierr;
32739ddd567SLois Curfman McInnes   if (!mdn->assembled)
32839ddd567SLois Curfman McInnes     SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first");
32939ddd567SLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
33039ddd567SLois Curfman McInnes   CHKERRQ(ierr);
33139ddd567SLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
33239ddd567SLois Curfman McInnes   CHKERRQ(ierr);
33339ddd567SLois Curfman McInnes   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
3348965ea79SLois Curfman McInnes   return 0;
3358965ea79SLois Curfman McInnes }
3368965ea79SLois Curfman McInnes 
33739ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
3388965ea79SLois Curfman McInnes {
33939ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3408965ea79SLois Curfman McInnes   int          ierr;
34139ddd567SLois Curfman McInnes   if (!mdn->assembled)
34239ddd567SLois Curfman McInnes     SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first");
3433501a2bdSLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
34439ddd567SLois Curfman McInnes   CHKERRQ(ierr);
3453501a2bdSLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
34639ddd567SLois Curfman McInnes   CHKERRQ(ierr);
34739ddd567SLois Curfman McInnes   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
3488965ea79SLois Curfman McInnes   return 0;
3498965ea79SLois Curfman McInnes }
3508965ea79SLois Curfman McInnes 
351096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
352096963f5SLois Curfman McInnes {
353096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
354096963f5SLois Curfman McInnes   int          ierr;
3553501a2bdSLois Curfman McInnes   Scalar       zero = 0.0;
356096963f5SLois Curfman McInnes 
357096963f5SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix");
3583501a2bdSLois Curfman McInnes   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
359096963f5SLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
360096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
361096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
362096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
363096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
364096963f5SLois Curfman McInnes   return 0;
365096963f5SLois Curfman McInnes }
366096963f5SLois Curfman McInnes 
367096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
368096963f5SLois Curfman McInnes {
369096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
370096963f5SLois Curfman McInnes   int          ierr;
371096963f5SLois Curfman McInnes 
372096963f5SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix");
3733501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
3743501a2bdSLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
375096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
376096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
377096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
378096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
379096963f5SLois Curfman McInnes   return 0;
380096963f5SLois Curfman McInnes }
381096963f5SLois Curfman McInnes 
38239ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v)
3838965ea79SLois Curfman McInnes {
38439ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
385096963f5SLois Curfman McInnes   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
386096963f5SLois Curfman McInnes   int          ierr, i, n, m = a->m, radd;
387096963f5SLois Curfman McInnes   Scalar       *x;
38839ddd567SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix");
389096963f5SLois Curfman McInnes   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
390096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
391096963f5SLois Curfman McInnes   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
392096963f5SLois Curfman McInnes   radd = a->rstart*m*m;
393096963f5SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
394096963f5SLois Curfman McInnes     x[i] = aloc->v[radd + i*m + i];
395096963f5SLois Curfman McInnes   }
396096963f5SLois Curfman McInnes   return 0;
3978965ea79SLois Curfman McInnes }
3988965ea79SLois Curfman McInnes 
39939ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj)
4008965ea79SLois Curfman McInnes {
4018965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
4023501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4038965ea79SLois Curfman McInnes   int          ierr;
4048965ea79SLois Curfman McInnes #if defined(PETSC_LOG)
4053501a2bdSLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
4068965ea79SLois Curfman McInnes #endif
407*0452661fSBarry Smith   PetscFree(mdn->rowners);
4083501a2bdSLois Curfman McInnes   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
4093501a2bdSLois Curfman McInnes   if (mdn->lvec)   VecDestroy(mdn->lvec);
4103501a2bdSLois Curfman McInnes   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
411*0452661fSBarry Smith   PetscFree(mdn);
4128965ea79SLois Curfman McInnes   PLogObjectDestroy(mat);
413*0452661fSBarry Smith   PetscHeaderDestroy(mat);
4148965ea79SLois Curfman McInnes   return 0;
4158965ea79SLois Curfman McInnes }
41639ddd567SLois Curfman McInnes 
4178965ea79SLois Curfman McInnes #include "pinclude/pviewer.h"
4188965ea79SLois Curfman McInnes 
41939ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
4208965ea79SLois Curfman McInnes {
42139ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4228965ea79SLois Curfman McInnes   int          ierr;
42339ddd567SLois Curfman McInnes   if (mdn->size == 1) {
42439ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4258965ea79SLois Curfman McInnes   }
42639ddd567SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
4278965ea79SLois Curfman McInnes   return 0;
4288965ea79SLois Curfman McInnes }
4298965ea79SLois Curfman McInnes 
43039ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
4318965ea79SLois Curfman McInnes {
43239ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
43339ddd567SLois Curfman McInnes   int          ierr, format;
4348965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
4358965ea79SLois Curfman McInnes   FILE         *fd;
4368965ea79SLois Curfman McInnes 
4373501a2bdSLois Curfman McInnes   ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
4388965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
4398965ea79SLois Curfman McInnes     ierr = ViewerFileGetFormat_Private(viewer,&format);
4403501a2bdSLois Curfman McInnes     if (format == FILE_FORMAT_INFO_DETAILED) {
441096963f5SLois Curfman McInnes       int nz, nzalloc, mem, rank;
442096963f5SLois Curfman McInnes       MPI_Comm_rank(mat->comm,&rank);
443096963f5SLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
444096963f5SLois Curfman McInnes       MPIU_Seq_begin(mat->comm,1);
4453501a2bdSLois Curfman McInnes         fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
446096963f5SLois Curfman McInnes             rank,mdn->m,nz,nzalloc,mem);
447096963f5SLois Curfman McInnes       fflush(fd);
448096963f5SLois Curfman McInnes       MPIU_Seq_end(mat->comm,1);
4493501a2bdSLois Curfman McInnes       ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
4503501a2bdSLois Curfman McInnes       return 0;
4513501a2bdSLois Curfman McInnes     }
4523501a2bdSLois Curfman McInnes     else if (format == FILE_FORMAT_INFO) {
453096963f5SLois Curfman McInnes       return 0;
4548965ea79SLois Curfman McInnes     }
4558965ea79SLois Curfman McInnes   }
4568965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER) {
4578965ea79SLois Curfman McInnes     MPIU_Seq_begin(mat->comm,1);
45839ddd567SLois Curfman McInnes     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
45939ddd567SLois Curfman McInnes              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
46039ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4618965ea79SLois Curfman McInnes     fflush(fd);
4628965ea79SLois Curfman McInnes     MPIU_Seq_end(mat->comm,1);
4638965ea79SLois Curfman McInnes   }
4648965ea79SLois Curfman McInnes   else {
46539ddd567SLois Curfman McInnes     int size = mdn->size, rank = mdn->rank;
4668965ea79SLois Curfman McInnes     if (size == 1) {
46739ddd567SLois Curfman McInnes       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4688965ea79SLois Curfman McInnes     }
4698965ea79SLois Curfman McInnes     else {
4708965ea79SLois Curfman McInnes       /* assemble the entire matrix onto first processor. */
4718965ea79SLois Curfman McInnes       Mat          A;
47239ddd567SLois Curfman McInnes       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
47339ddd567SLois Curfman McInnes       Scalar       *vals;
47439ddd567SLois Curfman McInnes       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
4758965ea79SLois Curfman McInnes 
4768965ea79SLois Curfman McInnes       if (!rank) {
47739ddd567SLois Curfman McInnes         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,&A); CHKERRQ(ierr);
4788965ea79SLois Curfman McInnes       }
4798965ea79SLois Curfman McInnes       else {
48039ddd567SLois Curfman McInnes         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,&A); CHKERRQ(ierr);
4818965ea79SLois Curfman McInnes       }
4828965ea79SLois Curfman McInnes       PLogObjectParent(mat,A);
4838965ea79SLois Curfman McInnes 
48439ddd567SLois Curfman McInnes       /* Copy the matrix ... This isn't the most efficient means,
48539ddd567SLois Curfman McInnes          but it's quick for now */
48639ddd567SLois Curfman McInnes       row = mdn->rstart; m = Amdn->m;
4878965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
48839ddd567SLois Curfman McInnes         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
48939ddd567SLois Curfman McInnes         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
49039ddd567SLois Curfman McInnes         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
49139ddd567SLois Curfman McInnes         row++;
4928965ea79SLois Curfman McInnes       }
4938965ea79SLois Curfman McInnes 
4948965ea79SLois Curfman McInnes       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
4958965ea79SLois Curfman McInnes       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
4968965ea79SLois Curfman McInnes       if (!rank) {
49739ddd567SLois Curfman McInnes         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
4988965ea79SLois Curfman McInnes       }
4998965ea79SLois Curfman McInnes       ierr = MatDestroy(A); CHKERRQ(ierr);
5008965ea79SLois Curfman McInnes     }
5018965ea79SLois Curfman McInnes   }
5028965ea79SLois Curfman McInnes   return 0;
5038965ea79SLois Curfman McInnes }
5048965ea79SLois Curfman McInnes 
50539ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer)
5068965ea79SLois Curfman McInnes {
5078965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
50839ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
5098965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
51039ddd567SLois Curfman McInnes   int          ierr;
5118965ea79SLois Curfman McInnes 
51239ddd567SLois Curfman McInnes   if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix");
5138965ea79SLois Curfman McInnes   if (!viewer) {
5148965ea79SLois Curfman McInnes     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
5158965ea79SLois Curfman McInnes   }
51639ddd567SLois Curfman McInnes   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
51739ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5188965ea79SLois Curfman McInnes   }
5198965ea79SLois Curfman McInnes   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
52039ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5218965ea79SLois Curfman McInnes   }
5228965ea79SLois Curfman McInnes   else if (vobj->type == BINARY_FILE_VIEWER) {
52339ddd567SLois Curfman McInnes     return MatView_MPIDense_Binary(mat,viewer);
5248965ea79SLois Curfman McInnes   }
5258965ea79SLois Curfman McInnes   return 0;
5268965ea79SLois Curfman McInnes }
5278965ea79SLois Curfman McInnes 
5283501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,
5298965ea79SLois Curfman McInnes                              int *nzalloc,int *mem)
5308965ea79SLois Curfman McInnes {
5313501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5323501a2bdSLois Curfman McInnes   Mat          mdn = mat->A;
53339ddd567SLois Curfman McInnes   int          ierr, isend[3], irecv[3];
5348965ea79SLois Curfman McInnes 
5353501a2bdSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
5368965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
5378965ea79SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
5388965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
5393501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
5408965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5418965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
5423501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
5438965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5448965ea79SLois Curfman McInnes   }
5458965ea79SLois Curfman McInnes   return 0;
5468965ea79SLois Curfman McInnes }
5478965ea79SLois Curfman McInnes 
54839ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op)
5498965ea79SLois Curfman McInnes {
55039ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
5518965ea79SLois Curfman McInnes 
5528965ea79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
5538965ea79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
5548965ea79SLois Curfman McInnes       op == COLUMNS_SORTED ||
5558965ea79SLois Curfman McInnes       op == ROW_ORIENTED) {
5568965ea79SLois Curfman McInnes         MatSetOption(a->A,op);
5578965ea79SLois Curfman McInnes   }
5588965ea79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
5598965ea79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
5608965ea79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
5618965ea79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
56239ddd567SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
5638965ea79SLois Curfman McInnes   else if (op == COLUMN_ORIENTED)
56439ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");}
5658965ea79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
56639ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
5678965ea79SLois Curfman McInnes   else
56839ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
5698965ea79SLois Curfman McInnes   return 0;
5708965ea79SLois Curfman McInnes }
5718965ea79SLois Curfman McInnes 
5723501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n)
5738965ea79SLois Curfman McInnes {
5743501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5758965ea79SLois Curfman McInnes   *m = mat->M; *n = mat->N;
5768965ea79SLois Curfman McInnes   return 0;
5778965ea79SLois Curfman McInnes }
5788965ea79SLois Curfman McInnes 
5793501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
5808965ea79SLois Curfman McInnes {
5813501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5828965ea79SLois Curfman McInnes   *m = mat->m; *n = mat->N;
5838965ea79SLois Curfman McInnes   return 0;
5848965ea79SLois Curfman McInnes }
5858965ea79SLois Curfman McInnes 
5863501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
5878965ea79SLois Curfman McInnes {
5883501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5898965ea79SLois Curfman McInnes   *m = mat->rstart; *n = mat->rend;
5908965ea79SLois Curfman McInnes   return 0;
5918965ea79SLois Curfman McInnes }
5928965ea79SLois Curfman McInnes 
5933501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
5948965ea79SLois Curfman McInnes {
5953501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
59639ddd567SLois Curfman McInnes   int          lrow, rstart = mat->rstart, rend = mat->rend;
5978965ea79SLois Curfman McInnes 
59839ddd567SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
5998965ea79SLois Curfman McInnes   lrow = row - rstart;
60039ddd567SLois Curfman McInnes   return MatGetRow(mat->A,lrow,nz,idx,v);
6018965ea79SLois Curfman McInnes }
6028965ea79SLois Curfman McInnes 
60339ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
6048965ea79SLois Curfman McInnes {
605*0452661fSBarry Smith   if (idx) PetscFree(*idx);
606*0452661fSBarry Smith   if (v) PetscFree(*v);
6078965ea79SLois Curfman McInnes   return 0;
6088965ea79SLois Curfman McInnes }
6098965ea79SLois Curfman McInnes 
610cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
611096963f5SLois Curfman McInnes {
6123501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
6133501a2bdSLois Curfman McInnes   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
6143501a2bdSLois Curfman McInnes   int          ierr, i, j;
6153501a2bdSLois Curfman McInnes   double       sum = 0.0;
6163501a2bdSLois Curfman McInnes   Scalar       *v = mat->v;
6173501a2bdSLois Curfman McInnes 
6183501a2bdSLois Curfman McInnes   if (!mdn->assembled) SETERRQ(1,"MatNorm_MPIDense:Must assemble matrix");
6193501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
6203501a2bdSLois Curfman McInnes     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
6213501a2bdSLois Curfman McInnes   } else {
6223501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
6233501a2bdSLois Curfman McInnes       for (i=0; i<mat->n*mat->m; i++ ) {
6243501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX)
6253501a2bdSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
6263501a2bdSLois Curfman McInnes #else
6273501a2bdSLois Curfman McInnes         sum += (*v)*(*v); v++;
6283501a2bdSLois Curfman McInnes #endif
6293501a2bdSLois Curfman McInnes       }
6303501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
6313501a2bdSLois Curfman McInnes       *norm = sqrt(*norm);
6323501a2bdSLois Curfman McInnes       PLogFlops(2*mat->n*mat->m);
6333501a2bdSLois Curfman McInnes     }
6343501a2bdSLois Curfman McInnes     else if (type == NORM_1) {
6353501a2bdSLois Curfman McInnes       double *tmp, *tmp2;
636*0452661fSBarry Smith       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
6373501a2bdSLois Curfman McInnes       tmp2 = tmp + mdn->N;
638cddf8d76SBarry Smith       PetscMemzero(tmp,2*mdn->N*sizeof(double));
639096963f5SLois Curfman McInnes       *norm = 0.0;
6403501a2bdSLois Curfman McInnes       v = mat->v;
6413501a2bdSLois Curfman McInnes       for ( j=0; j<mat->n; j++ ) {
6423501a2bdSLois Curfman McInnes         for ( i=0; i<mat->m; i++ ) {
643cddf8d76SBarry Smith           tmp[j] += PetscAbsScalar(*v++);
6443501a2bdSLois Curfman McInnes         }
6453501a2bdSLois Curfman McInnes       }
6463501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
6473501a2bdSLois Curfman McInnes       for ( j=0; j<mdn->N; j++ ) {
6483501a2bdSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
6493501a2bdSLois Curfman McInnes       }
650*0452661fSBarry Smith       PetscFree(tmp);
6513501a2bdSLois Curfman McInnes       PLogFlops(mat->n*mat->m);
6523501a2bdSLois Curfman McInnes     }
6533501a2bdSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
6543501a2bdSLois Curfman McInnes       double ntemp;
6553501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
6563501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
6573501a2bdSLois Curfman McInnes     }
6583501a2bdSLois Curfman McInnes     else {
6593501a2bdSLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
6603501a2bdSLois Curfman McInnes     }
6613501a2bdSLois Curfman McInnes   }
6623501a2bdSLois Curfman McInnes   return 0;
6633501a2bdSLois Curfman McInnes }
6643501a2bdSLois Curfman McInnes 
6653501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout)
6663501a2bdSLois Curfman McInnes {
6673501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
6683501a2bdSLois Curfman McInnes   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
6693501a2bdSLois Curfman McInnes   Mat          B;
6703501a2bdSLois Curfman McInnes   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
6713501a2bdSLois Curfman McInnes   int          j, i, ierr;
6723501a2bdSLois Curfman McInnes   Scalar       *v;
6733501a2bdSLois Curfman McInnes 
6743501a2bdSLois Curfman McInnes   if (!matout && M != N)
6753501a2bdSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
6763501a2bdSLois Curfman McInnes   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B); CHKERRQ(ierr);
6773501a2bdSLois Curfman McInnes 
6783501a2bdSLois Curfman McInnes   m = Aloc->m; n = Aloc->n; v = Aloc->v;
679*0452661fSBarry Smith   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
6803501a2bdSLois Curfman McInnes   for ( j=0; j<n; j++ ) {
6813501a2bdSLois Curfman McInnes     for (i=0; i<m; i++) rwork[i] = rstart + i;
6823501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
6833501a2bdSLois Curfman McInnes     v += m;
6843501a2bdSLois Curfman McInnes   }
685*0452661fSBarry Smith   PetscFree(rwork);
6863501a2bdSLois Curfman McInnes   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
6873501a2bdSLois Curfman McInnes   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
6883501a2bdSLois Curfman McInnes   if (matout) {
6893501a2bdSLois Curfman McInnes     *matout = B;
6903501a2bdSLois Curfman McInnes   } else {
6913501a2bdSLois Curfman McInnes     /* This isn't really an in-place transpose, but free data struct from a */
692*0452661fSBarry Smith     PetscFree(a->rowners);
6933501a2bdSLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
6943501a2bdSLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
6953501a2bdSLois Curfman McInnes     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
696*0452661fSBarry Smith     PetscFree(a);
6973501a2bdSLois Curfman McInnes     PetscMemcpy(A,B,sizeof(struct _Mat));
698*0452661fSBarry Smith     PetscHeaderDestroy(B);
6993501a2bdSLois Curfman McInnes   }
700096963f5SLois Curfman McInnes   return 0;
701096963f5SLois Curfman McInnes }
702096963f5SLois Curfman McInnes 
70355659b69SBarry Smith static int MatCopyPrivate_MPIDense(Mat,Mat *,int);
7048965ea79SLois Curfman McInnes 
7058965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
70639ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense,
70739ddd567SLois Curfman McInnes        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
70839ddd567SLois Curfman McInnes        MatMult_MPIDense,MatMultAdd_MPIDense,
709096963f5SLois Curfman McInnes        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
71039ddd567SLois Curfman McInnes        0,0,
71139ddd567SLois Curfman McInnes        0,0,0,
7123501a2bdSLois Curfman McInnes        0,0,MatTranspose_MPIDense,
71339ddd567SLois Curfman McInnes        MatGetInfo_MPIDense,0,
714096963f5SLois Curfman McInnes        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
71539ddd567SLois Curfman McInnes        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
7168965ea79SLois Curfman McInnes        0,
71739ddd567SLois Curfman McInnes        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
71839ddd567SLois Curfman McInnes        0,
71939ddd567SLois Curfman McInnes        0,0,0,0,
72039ddd567SLois Curfman McInnes        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
72139ddd567SLois Curfman McInnes        MatGetOwnershipRange_MPIDense,
72239ddd567SLois Curfman McInnes        0,0,
72339ddd567SLois Curfman McInnes        0,0,0,0,0,MatCopyPrivate_MPIDense};
7248965ea79SLois Curfman McInnes 
7258965ea79SLois Curfman McInnes /*@C
72639ddd567SLois Curfman McInnes    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
7278965ea79SLois Curfman McInnes 
7288965ea79SLois Curfman McInnes    Input Parameters:
7298965ea79SLois Curfman McInnes .  comm - MPI communicator
7308965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
7318965ea79SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
7328965ea79SLois Curfman McInnes            if N is given)
7338965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
7348965ea79SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
7358965ea79SLois Curfman McInnes            if n is given)
7368965ea79SLois Curfman McInnes 
7378965ea79SLois Curfman McInnes    Output Parameter:
7388965ea79SLois Curfman McInnes .  newmat - the matrix
7398965ea79SLois Curfman McInnes 
7408965ea79SLois Curfman McInnes    Notes:
74139ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
74239ddd567SLois Curfman McInnes    storage by columns.
7438965ea79SLois Curfman McInnes 
7448965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
7458965ea79SLois Curfman McInnes    (possibly both).
7468965ea79SLois Curfman McInnes 
7473501a2bdSLois Curfman McInnes    Currently, the only parallel dense matrix decomposition is by rows,
7483501a2bdSLois Curfman McInnes    so that n=N and each submatrix owns all of the global columns.
7493501a2bdSLois Curfman McInnes 
75039ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel
7518965ea79SLois Curfman McInnes 
75239ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
7538965ea79SLois Curfman McInnes @*/
75439ddd567SLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Mat *newmat)
7558965ea79SLois Curfman McInnes {
7568965ea79SLois Curfman McInnes   Mat          mat;
75739ddd567SLois Curfman McInnes   Mat_MPIDense *a;
75839ddd567SLois Curfman McInnes   int          ierr, i;
7598965ea79SLois Curfman McInnes 
7608965ea79SLois Curfman McInnes   *newmat         = 0;
761*0452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
7628965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
763*0452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
7648965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
76539ddd567SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIDense;
76639ddd567SLois Curfman McInnes   mat->view       = MatView_MPIDense;
7678965ea79SLois Curfman McInnes   mat->factor     = 0;
7688965ea79SLois Curfman McInnes 
7698965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
7708965ea79SLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
7718965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
7728965ea79SLois Curfman McInnes 
77339ddd567SLois Curfman McInnes   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
7748965ea79SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
77539ddd567SLois Curfman McInnes 
77639ddd567SLois Curfman McInnes   /* each row stores all columns */
77739ddd567SLois Curfman McInnes   if (N == PETSC_DECIDE) N = n;
77839ddd567SLois Curfman McInnes   if (n == PETSC_DECIDE) n = N;
77939ddd567SLois Curfman McInnes   if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported");
7808965ea79SLois Curfman McInnes   a->N = N;
7818965ea79SLois Curfman McInnes   a->M = M;
78239ddd567SLois Curfman McInnes   a->m = m;
78339ddd567SLois Curfman McInnes   a->n = n;
7848965ea79SLois Curfman McInnes 
7858965ea79SLois Curfman McInnes   /* build local table of row and column ownerships */
786*0452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
78739ddd567SLois Curfman McInnes   PLogObjectMemory(mat,(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
78839ddd567SLois Curfman McInnes                        sizeof(Mat_MPIDense));
7898965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
7908965ea79SLois Curfman McInnes   a->rowners[0] = 0;
7918965ea79SLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
7928965ea79SLois Curfman McInnes     a->rowners[i] += a->rowners[i-1];
7938965ea79SLois Curfman McInnes   }
7948965ea79SLois Curfman McInnes   a->rstart = a->rowners[a->rank];
7958965ea79SLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
7968965ea79SLois Curfman McInnes 
79739ddd567SLois Curfman McInnes   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,&a->A); CHKERRQ(ierr);
7988965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
7998965ea79SLois Curfman McInnes 
8008965ea79SLois Curfman McInnes   /* build cache for off array entries formed */
8018965ea79SLois Curfman McInnes   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
8028965ea79SLois Curfman McInnes 
8038965ea79SLois Curfman McInnes   /* stuff used for matrix vector multiply */
8048965ea79SLois Curfman McInnes   a->lvec      = 0;
8058965ea79SLois Curfman McInnes   a->Mvctx     = 0;
8068965ea79SLois Curfman McInnes   a->assembled = 0;
8078965ea79SLois Curfman McInnes 
8088965ea79SLois Curfman McInnes   *newmat = mat;
8098965ea79SLois Curfman McInnes   return 0;
8108965ea79SLois Curfman McInnes }
8118965ea79SLois Curfman McInnes 
8123501a2bdSLois Curfman McInnes static int MatCopyPrivate_MPIDense(Mat A,Mat *newmat,int cpvalues)
8138965ea79SLois Curfman McInnes {
8148965ea79SLois Curfman McInnes   Mat          mat;
8153501a2bdSLois Curfman McInnes   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
81639ddd567SLois Curfman McInnes   int          ierr;
8178965ea79SLois Curfman McInnes 
81839ddd567SLois Curfman McInnes   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix");
8198965ea79SLois Curfman McInnes   *newmat       = 0;
820*0452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
8218965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
822*0452661fSBarry Smith   mat->data     = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
8238965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
82439ddd567SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIDense;
82539ddd567SLois Curfman McInnes   mat->view     = MatView_MPIDense;
8263501a2bdSLois Curfman McInnes   mat->factor   = A->factor;
8278965ea79SLois Curfman McInnes 
8288965ea79SLois Curfman McInnes   a->m          = oldmat->m;
8298965ea79SLois Curfman McInnes   a->n          = oldmat->n;
8308965ea79SLois Curfman McInnes   a->M          = oldmat->M;
8318965ea79SLois Curfman McInnes   a->N          = oldmat->N;
8328965ea79SLois Curfman McInnes 
8338965ea79SLois Curfman McInnes   a->assembled  = 1;
8348965ea79SLois Curfman McInnes   a->rstart     = oldmat->rstart;
8358965ea79SLois Curfman McInnes   a->rend       = oldmat->rend;
8368965ea79SLois Curfman McInnes   a->size       = oldmat->size;
8378965ea79SLois Curfman McInnes   a->rank       = oldmat->rank;
8388965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
8398965ea79SLois Curfman McInnes 
840*0452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
84139ddd567SLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
8428965ea79SLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
8438965ea79SLois Curfman McInnes   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
8448965ea79SLois Curfman McInnes 
8458965ea79SLois Curfman McInnes   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
8468965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->lvec);
84755659b69SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
8488965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->Mvctx);
8498965ea79SLois Curfman McInnes   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
8508965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
8518965ea79SLois Curfman McInnes   *newmat = mat;
8528965ea79SLois Curfman McInnes   return 0;
8538965ea79SLois Curfman McInnes }
8548965ea79SLois Curfman McInnes 
8558965ea79SLois Curfman McInnes #include "sysio.h"
8568965ea79SLois Curfman McInnes 
85739ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
8588965ea79SLois Curfman McInnes {
8598965ea79SLois Curfman McInnes   Mat          A;
8608965ea79SLois Curfman McInnes   int          i, nz, ierr, j,rstart, rend, fd;
8618965ea79SLois Curfman McInnes   Scalar       *vals,*svals;
8628965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) bview;
8638965ea79SLois Curfman McInnes   MPI_Comm     comm = vobj->comm;
8648965ea79SLois Curfman McInnes   MPI_Status   status;
8658965ea79SLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
8668965ea79SLois Curfman McInnes   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
8678965ea79SLois Curfman McInnes   int          tag = ((PetscObject)bview)->tag;
8688965ea79SLois Curfman McInnes 
8698965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
8708965ea79SLois Curfman McInnes   if (!rank) {
8718965ea79SLois Curfman McInnes     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
8728965ea79SLois Curfman McInnes     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
87339ddd567SLois Curfman McInnes     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
8748965ea79SLois Curfman McInnes   }
8758965ea79SLois Curfman McInnes 
8768965ea79SLois Curfman McInnes   MPI_Bcast(header+1,3,MPI_INT,0,comm);
8778965ea79SLois Curfman McInnes   M = header[1]; N = header[2];
8788965ea79SLois Curfman McInnes   /* determine ownership of all rows */
8798965ea79SLois Curfman McInnes   m = M/size + ((M % size) > rank);
880*0452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
8818965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
8828965ea79SLois Curfman McInnes   rowners[0] = 0;
8838965ea79SLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
8848965ea79SLois Curfman McInnes     rowners[i] += rowners[i-1];
8858965ea79SLois Curfman McInnes   }
8868965ea79SLois Curfman McInnes   rstart = rowners[rank];
8878965ea79SLois Curfman McInnes   rend   = rowners[rank+1];
8888965ea79SLois Curfman McInnes 
8898965ea79SLois Curfman McInnes   /* distribute row lengths to all processors */
890*0452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
8918965ea79SLois Curfman McInnes   offlens = ourlens + (rend-rstart);
8928965ea79SLois Curfman McInnes   if (!rank) {
893*0452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
8948965ea79SLois Curfman McInnes     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
895*0452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
8968965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
8978965ea79SLois Curfman McInnes     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
898*0452661fSBarry Smith     PetscFree(sndcounts);
8998965ea79SLois Curfman McInnes   }
9008965ea79SLois Curfman McInnes   else {
9018965ea79SLois Curfman McInnes     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
9028965ea79SLois Curfman McInnes   }
9038965ea79SLois Curfman McInnes 
9048965ea79SLois Curfman McInnes   if (!rank) {
9058965ea79SLois Curfman McInnes     /* calculate the number of nonzeros on each processor */
906*0452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
907cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
9088965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
9098965ea79SLois Curfman McInnes       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
9108965ea79SLois Curfman McInnes         procsnz[i] += rowlengths[j];
9118965ea79SLois Curfman McInnes       }
9128965ea79SLois Curfman McInnes     }
913*0452661fSBarry Smith     PetscFree(rowlengths);
9148965ea79SLois Curfman McInnes 
9158965ea79SLois Curfman McInnes     /* determine max buffer needed and allocate it */
9168965ea79SLois Curfman McInnes     maxnz = 0;
9178965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
918*0452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
9198965ea79SLois Curfman McInnes     }
920*0452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
9218965ea79SLois Curfman McInnes 
9228965ea79SLois Curfman McInnes     /* read in my part of the matrix column indices  */
9238965ea79SLois Curfman McInnes     nz = procsnz[0];
924*0452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
9258965ea79SLois Curfman McInnes     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
9268965ea79SLois Curfman McInnes 
9278965ea79SLois Curfman McInnes     /* read in every one elses and ship off */
9288965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
9298965ea79SLois Curfman McInnes       nz = procsnz[i];
9308965ea79SLois Curfman McInnes       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
9318965ea79SLois Curfman McInnes       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
9328965ea79SLois Curfman McInnes     }
933*0452661fSBarry Smith     PetscFree(cols);
9348965ea79SLois Curfman McInnes   }
9358965ea79SLois Curfman McInnes   else {
9368965ea79SLois Curfman McInnes     /* determine buffer space needed for message */
9378965ea79SLois Curfman McInnes     nz = 0;
9388965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
9398965ea79SLois Curfman McInnes       nz += ourlens[i];
9408965ea79SLois Curfman McInnes     }
941*0452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
9428965ea79SLois Curfman McInnes 
9438965ea79SLois Curfman McInnes     /* receive message of column indices*/
9448965ea79SLois Curfman McInnes     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
9458965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPI_INT,&maxnz);
94639ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
9478965ea79SLois Curfman McInnes   }
9488965ea79SLois Curfman McInnes 
9498965ea79SLois Curfman McInnes   /* loop over local rows, determining number of off diagonal entries */
950cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
9518965ea79SLois Curfman McInnes   jj = 0;
9528965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
9538965ea79SLois Curfman McInnes     for ( j=0; j<ourlens[i]; j++ ) {
9548965ea79SLois Curfman McInnes       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
9558965ea79SLois Curfman McInnes       jj++;
9568965ea79SLois Curfman McInnes     }
9578965ea79SLois Curfman McInnes   }
9588965ea79SLois Curfman McInnes 
9598965ea79SLois Curfman McInnes   /* create our matrix */
9608965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
9618965ea79SLois Curfman McInnes     ourlens[i] -= offlens[i];
9628965ea79SLois Curfman McInnes   }
96339ddd567SLois Curfman McInnes   if (type == MATMPIDENSE) {
96439ddd567SLois Curfman McInnes     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
9658965ea79SLois Curfman McInnes   }
9668965ea79SLois Curfman McInnes   A = *newmat;
9678965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
9688965ea79SLois Curfman McInnes     ourlens[i] += offlens[i];
9698965ea79SLois Curfman McInnes   }
9708965ea79SLois Curfman McInnes 
9718965ea79SLois Curfman McInnes   if (!rank) {
972*0452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
9738965ea79SLois Curfman McInnes 
9748965ea79SLois Curfman McInnes     /* read in my part of the matrix numerical values  */
9758965ea79SLois Curfman McInnes     nz = procsnz[0];
9768965ea79SLois Curfman McInnes     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
9778965ea79SLois Curfman McInnes 
9788965ea79SLois Curfman McInnes     /* insert into matrix */
9798965ea79SLois Curfman McInnes     jj      = rstart;
9808965ea79SLois Curfman McInnes     smycols = mycols;
9818965ea79SLois Curfman McInnes     svals   = vals;
9828965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
9838965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
9848965ea79SLois Curfman McInnes       smycols += ourlens[i];
9858965ea79SLois Curfman McInnes       svals   += ourlens[i];
9868965ea79SLois Curfman McInnes       jj++;
9878965ea79SLois Curfman McInnes     }
9888965ea79SLois Curfman McInnes 
9898965ea79SLois Curfman McInnes     /* read in other processors and ship out */
9908965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
9918965ea79SLois Curfman McInnes       nz = procsnz[i];
9928965ea79SLois Curfman McInnes       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
9938965ea79SLois Curfman McInnes       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
9948965ea79SLois Curfman McInnes     }
995*0452661fSBarry Smith     PetscFree(procsnz);
9968965ea79SLois Curfman McInnes   }
9978965ea79SLois Curfman McInnes   else {
9988965ea79SLois Curfman McInnes     /* receive numeric values */
999*0452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
10008965ea79SLois Curfman McInnes 
10018965ea79SLois Curfman McInnes     /* receive message of values*/
10028965ea79SLois Curfman McInnes     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
10038965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
100439ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
10058965ea79SLois Curfman McInnes 
10068965ea79SLois Curfman McInnes     /* insert into matrix */
10078965ea79SLois Curfman McInnes     jj      = rstart;
10088965ea79SLois Curfman McInnes     smycols = mycols;
10098965ea79SLois Curfman McInnes     svals   = vals;
10108965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10118965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10128965ea79SLois Curfman McInnes       smycols += ourlens[i];
10138965ea79SLois Curfman McInnes       svals   += ourlens[i];
10148965ea79SLois Curfman McInnes       jj++;
10158965ea79SLois Curfman McInnes     }
10168965ea79SLois Curfman McInnes   }
1017*0452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
10188965ea79SLois Curfman McInnes 
10198965ea79SLois Curfman McInnes   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
10208965ea79SLois Curfman McInnes   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
10218965ea79SLois Curfman McInnes   return 0;
10228965ea79SLois Curfman McInnes }
1023