xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision ed2daf618d39a4e4da77b7c71505379737a539fc)
18965ea79SLois Curfman McInnes #ifndef lint
2*ed2daf61SLois Curfman McInnes static char vcid[] = "$Id: mpidense.c,v 1.15 1995/12/10 18:57:58 curfman Exp curfman $";
38965ea79SLois Curfman McInnes #endif
48965ea79SLois Curfman McInnes 
5ed3cc1f0SBarry Smith /*
6ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
7ed3cc1f0SBarry Smith */
8ed3cc1f0SBarry Smith 
98965ea79SLois Curfman McInnes #include "mpidense.h"
108965ea79SLois Curfman McInnes #include "vec/vecimpl.h"
118965ea79SLois Curfman McInnes #include "inline/spops.h"
128965ea79SLois Curfman McInnes 
1339ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
148965ea79SLois Curfman McInnes                                int *idxn,Scalar *v,InsertMode addv)
158965ea79SLois Curfman McInnes {
1639ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1739ddd567SLois Curfman McInnes   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
188965ea79SLois Curfman McInnes 
1939ddd567SLois Curfman McInnes   if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) {
2039ddd567SLois Curfman McInnes     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
218965ea79SLois Curfman McInnes   }
2239ddd567SLois Curfman McInnes   mdn->insertmode = addv;
238965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
2439ddd567SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
2539ddd567SLois Curfman McInnes     if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
268965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
278965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
288965ea79SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
2939ddd567SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
30b49de8d1SLois Curfman McInnes         if (idxn[j] >= mdn->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large");
31b49de8d1SLois Curfman McInnes         ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv); CHKERRQ(ierr);
328965ea79SLois Curfman McInnes       }
338965ea79SLois Curfman McInnes     }
348965ea79SLois Curfman McInnes     else {
35b49de8d1SLois Curfman McInnes       ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
36b49de8d1SLois Curfman McInnes     }
37b49de8d1SLois Curfman McInnes   }
38b49de8d1SLois Curfman McInnes   return 0;
39b49de8d1SLois Curfman McInnes }
40b49de8d1SLois Curfman McInnes 
41b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
42b49de8d1SLois Curfman McInnes {
43b49de8d1SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
44b49de8d1SLois Curfman McInnes   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
45b49de8d1SLois Curfman McInnes 
46b49de8d1SLois Curfman McInnes   if (!mdn->assembled) SETERRQ(1,"MatGetValues_MPIDense:Not for unassembled matrix");
47b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
48b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row");
49b49de8d1SLois Curfman McInnes     if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large");
50b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
51b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
52b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
53b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column");
54b49de8d1SLois Curfman McInnes         if (idxn[j] >= mdn->N)
55b49de8d1SLois Curfman McInnes           SETERRQ(1,"MatGetValues_MPIDense:Column too large");
56b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
57b49de8d1SLois Curfman McInnes       }
58b49de8d1SLois Curfman McInnes     }
59b49de8d1SLois Curfman McInnes     else {
60b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported");
618965ea79SLois Curfman McInnes     }
628965ea79SLois Curfman McInnes   }
638965ea79SLois Curfman McInnes   return 0;
648965ea79SLois Curfman McInnes }
658965ea79SLois Curfman McInnes 
6639ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
678965ea79SLois Curfman McInnes {
6839ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
698965ea79SLois Curfman McInnes   MPI_Comm     comm = mat->comm;
7039ddd567SLois Curfman McInnes   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
718965ea79SLois Curfman McInnes   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
7239ddd567SLois Curfman McInnes   int          tag = mat->tag, *owner,*starts,count,ierr;
738965ea79SLois Curfman McInnes   InsertMode   addv;
7439ddd567SLois Curfman McInnes   MPI_Request  *send_waits,*recv_waits;
758965ea79SLois Curfman McInnes   Scalar       *rvalues,*svalues;
768965ea79SLois Curfman McInnes 
778965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
7839ddd567SLois Curfman McInnes   MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT,
7939ddd567SLois Curfman McInnes                 MPI_BOR,comm);
8039ddd567SLois Curfman McInnes   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
8139ddd567SLois Curfman McInnes     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
828965ea79SLois Curfman McInnes     }
8339ddd567SLois Curfman McInnes   mdn->insertmode = addv; /* in case this processor had no cache */
848965ea79SLois Curfman McInnes 
858965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
860452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
87cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
880452661fSBarry Smith   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
8939ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
9039ddd567SLois Curfman McInnes     idx = mdn->stash.idx[i];
918965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
928965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
938965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
948965ea79SLois Curfman McInnes       }
958965ea79SLois Curfman McInnes     }
968965ea79SLois Curfman McInnes   }
978965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
988965ea79SLois Curfman McInnes 
998965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
1000452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
10139ddd567SLois Curfman McInnes   MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm);
1028965ea79SLois Curfman McInnes   nreceives = work[rank];
10339ddd567SLois Curfman McInnes   MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm);
1048965ea79SLois Curfman McInnes   nmax = work[rank];
1050452661fSBarry Smith   PetscFree(work);
1068965ea79SLois Curfman McInnes 
1078965ea79SLois Curfman McInnes   /* post receives:
1088965ea79SLois Curfman McInnes        1) each message will consist of ordered pairs
1098965ea79SLois Curfman McInnes      (global index,value) we store the global index as a double
1108965ea79SLois Curfman McInnes      to simplify the message passing.
1118965ea79SLois Curfman McInnes        2) since we don't know how long each individual message is we
1128965ea79SLois Curfman McInnes      allocate the largest needed buffer for each receive. Potentially
1138965ea79SLois Curfman McInnes      this is a lot of wasted space.
1148965ea79SLois Curfman McInnes 
1158965ea79SLois Curfman McInnes        This could be done better.
1168965ea79SLois Curfman McInnes   */
1170452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
1188965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
1190452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
1208965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
1218965ea79SLois Curfman McInnes   for ( i=0; i<nreceives; i++ ) {
12239ddd567SLois Curfman McInnes     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1238965ea79SLois Curfman McInnes               comm,recv_waits+i);
1248965ea79SLois Curfman McInnes   }
1258965ea79SLois Curfman McInnes 
1268965ea79SLois Curfman McInnes   /* do sends:
1278965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
1288965ea79SLois Curfman McInnes          the ith processor
1298965ea79SLois Curfman McInnes   */
1300452661fSBarry Smith   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
13139ddd567SLois Curfman McInnes   CHKPTRQ(svalues);
1320452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
1338965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
1340452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1358965ea79SLois Curfman McInnes   starts[0] = 0;
1368965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13739ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
13839ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
13939ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
14039ddd567SLois Curfman McInnes     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
1418965ea79SLois Curfman McInnes   }
1420452661fSBarry Smith   PetscFree(owner);
1438965ea79SLois Curfman McInnes   starts[0] = 0;
1448965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1458965ea79SLois Curfman McInnes   count = 0;
1468965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
1478965ea79SLois Curfman McInnes     if (procs[i]) {
14839ddd567SLois Curfman McInnes       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
1498965ea79SLois Curfman McInnes                 comm,send_waits+count++);
1508965ea79SLois Curfman McInnes     }
1518965ea79SLois Curfman McInnes   }
1520452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
1538965ea79SLois Curfman McInnes 
1548965ea79SLois Curfman McInnes   /* Free cache space */
15539ddd567SLois Curfman McInnes   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
1568965ea79SLois Curfman McInnes 
15739ddd567SLois Curfman McInnes   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
15839ddd567SLois Curfman McInnes   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
15939ddd567SLois Curfman McInnes   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
16039ddd567SLois Curfman McInnes   mdn->rmax       = nmax;
1618965ea79SLois Curfman McInnes 
1628965ea79SLois Curfman McInnes   return 0;
1638965ea79SLois Curfman McInnes }
16439ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat);
1658965ea79SLois Curfman McInnes 
16639ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
1678965ea79SLois Curfman McInnes {
16839ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1698965ea79SLois Curfman McInnes   MPI_Status   *send_status,recv_status;
17039ddd567SLois Curfman McInnes   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
1718965ea79SLois Curfman McInnes   Scalar       *values,val;
17239ddd567SLois Curfman McInnes   InsertMode   addv = mdn->insertmode;
1738965ea79SLois Curfman McInnes 
1748965ea79SLois Curfman McInnes   /*  wait on receives */
1758965ea79SLois Curfman McInnes   while (count) {
17639ddd567SLois Curfman McInnes     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
1778965ea79SLois Curfman McInnes     /* unpack receives into our local space */
17839ddd567SLois Curfman McInnes     values = mdn->rvalues + 3*imdex*mdn->rmax;
1798965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
1808965ea79SLois Curfman McInnes     n = n/3;
1818965ea79SLois Curfman McInnes     for ( i=0; i<n; i++ ) {
18239ddd567SLois Curfman McInnes       row = (int) PETSCREAL(values[3*i]) - mdn->rstart;
1838965ea79SLois Curfman McInnes       col = (int) PETSCREAL(values[3*i+1]);
1848965ea79SLois Curfman McInnes       val = values[3*i+2];
18539ddd567SLois Curfman McInnes       if (col >= 0 && col < mdn->N) {
18639ddd567SLois Curfman McInnes         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
1878965ea79SLois Curfman McInnes       }
18839ddd567SLois Curfman McInnes       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
1898965ea79SLois Curfman McInnes     }
1908965ea79SLois Curfman McInnes     count--;
1918965ea79SLois Curfman McInnes   }
1920452661fSBarry Smith   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
1938965ea79SLois Curfman McInnes 
1948965ea79SLois Curfman McInnes   /* wait on sends */
19539ddd567SLois Curfman McInnes   if (mdn->nsends) {
1960452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) );
1978965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
19839ddd567SLois Curfman McInnes     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
1990452661fSBarry Smith     PetscFree(send_status);
2008965ea79SLois Curfman McInnes   }
2010452661fSBarry Smith   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
2028965ea79SLois Curfman McInnes 
20339ddd567SLois Curfman McInnes   mdn->insertmode = NOT_SET_VALUES;
20439ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
20539ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
2068965ea79SLois Curfman McInnes 
20739ddd567SLois Curfman McInnes   if (!mdn->assembled && mode == FINAL_ASSEMBLY) {
20839ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
2098965ea79SLois Curfman McInnes   }
21039ddd567SLois Curfman McInnes   mdn->assembled = 1;
2118965ea79SLois Curfman McInnes   return 0;
2128965ea79SLois Curfman McInnes }
2138965ea79SLois Curfman McInnes 
21439ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A)
2158965ea79SLois Curfman McInnes {
21639ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
21739ddd567SLois Curfman McInnes   return MatZeroEntries(l->A);
2188965ea79SLois Curfman McInnes }
2198965ea79SLois Curfman McInnes 
2208965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
2218965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
2228965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
2233501a2bdSLois Curfman McInnes    mdn->A and mdn->B directly and not through the MatZeroRows()
2248965ea79SLois Curfman McInnes    routine.
2258965ea79SLois Curfman McInnes */
22639ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
2278965ea79SLois Curfman McInnes {
22839ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
2298965ea79SLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
2308965ea79SLois Curfman McInnes   int            *procs,*nprocs,j,found,idx,nsends,*work;
2318965ea79SLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
2328965ea79SLois Curfman McInnes   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
2338965ea79SLois Curfman McInnes   int            *lens,imdex,*lrows,*values;
2348965ea79SLois Curfman McInnes   MPI_Comm       comm = A->comm;
2358965ea79SLois Curfman McInnes   MPI_Request    *send_waits,*recv_waits;
2368965ea79SLois Curfman McInnes   MPI_Status     recv_status,*send_status;
2378965ea79SLois Curfman McInnes   IS             istmp;
2388965ea79SLois Curfman McInnes 
23939ddd567SLois Curfman McInnes   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix");
2408965ea79SLois Curfman McInnes   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
2418965ea79SLois Curfman McInnes   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
2428965ea79SLois Curfman McInnes 
2438965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
2440452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
245cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
2460452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
2478965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2488965ea79SLois Curfman McInnes     idx = rows[i];
2498965ea79SLois Curfman McInnes     found = 0;
2508965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2518965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
2528965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2538965ea79SLois Curfman McInnes       }
2548965ea79SLois Curfman McInnes     }
25539ddd567SLois Curfman McInnes     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
2568965ea79SLois Curfman McInnes   }
2578965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2588965ea79SLois Curfman McInnes 
2598965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
2600452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
2618965ea79SLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
2628965ea79SLois Curfman McInnes   nrecvs = work[rank];
2638965ea79SLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
2648965ea79SLois Curfman McInnes   nmax = work[rank];
2650452661fSBarry Smith   PetscFree(work);
2668965ea79SLois Curfman McInnes 
2678965ea79SLois Curfman McInnes   /* post receives:   */
2680452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
2698965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
2700452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
2718965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
2728965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
2738965ea79SLois Curfman McInnes     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2748965ea79SLois Curfman McInnes   }
2758965ea79SLois Curfman McInnes 
2768965ea79SLois Curfman McInnes   /* do sends:
2778965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
2788965ea79SLois Curfman McInnes          the ith processor
2798965ea79SLois Curfman McInnes   */
2800452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
2810452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
2828965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
2830452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
2848965ea79SLois Curfman McInnes   starts[0] = 0;
2858965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2868965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2878965ea79SLois Curfman McInnes     svalues[starts[owner[i]]++] = rows[i];
2888965ea79SLois Curfman McInnes   }
2898965ea79SLois Curfman McInnes   ISRestoreIndices(is,&rows);
2908965ea79SLois Curfman McInnes 
2918965ea79SLois Curfman McInnes   starts[0] = 0;
2928965ea79SLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2938965ea79SLois Curfman McInnes   count = 0;
2948965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2958965ea79SLois Curfman McInnes     if (procs[i]) {
2968965ea79SLois Curfman McInnes       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
2978965ea79SLois Curfman McInnes     }
2988965ea79SLois Curfman McInnes   }
2990452661fSBarry Smith   PetscFree(starts);
3008965ea79SLois Curfman McInnes 
3018965ea79SLois Curfman McInnes   base = owners[rank];
3028965ea79SLois Curfman McInnes 
3038965ea79SLois Curfman McInnes   /*  wait on receives */
3040452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3058965ea79SLois Curfman McInnes   source = lens + nrecvs;
3068965ea79SLois Curfman McInnes   count  = nrecvs; slen = 0;
3078965ea79SLois Curfman McInnes   while (count) {
3088965ea79SLois Curfman McInnes     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3098965ea79SLois Curfman McInnes     /* unpack receives into our local space */
3108965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPI_INT,&n);
3118965ea79SLois Curfman McInnes     source[imdex]  = recv_status.MPI_SOURCE;
3128965ea79SLois Curfman McInnes     lens[imdex]  = n;
3138965ea79SLois Curfman McInnes     slen += n;
3148965ea79SLois Curfman McInnes     count--;
3158965ea79SLois Curfman McInnes   }
3160452661fSBarry Smith   PetscFree(recv_waits);
3178965ea79SLois Curfman McInnes 
3188965ea79SLois Curfman McInnes   /* move the data into the send scatter */
3190452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
3208965ea79SLois Curfman McInnes   count = 0;
3218965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
3228965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
3238965ea79SLois Curfman McInnes     for ( j=0; j<lens[i]; j++ ) {
3248965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
3258965ea79SLois Curfman McInnes     }
3268965ea79SLois Curfman McInnes   }
3270452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
3280452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
3298965ea79SLois Curfman McInnes 
3308965ea79SLois Curfman McInnes   /* actually zap the local rows */
3318965ea79SLois Curfman McInnes   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
3328965ea79SLois Curfman McInnes   PLogObjectParent(A,istmp);
3330452661fSBarry Smith   PetscFree(lrows);
3348965ea79SLois Curfman McInnes   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
3358965ea79SLois Curfman McInnes   ierr = ISDestroy(istmp); CHKERRQ(ierr);
3368965ea79SLois Curfman McInnes 
3378965ea79SLois Curfman McInnes   /* wait on sends */
3388965ea79SLois Curfman McInnes   if (nsends) {
3390452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
3408965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
3418965ea79SLois Curfman McInnes     MPI_Waitall(nsends,send_waits,send_status);
3420452661fSBarry Smith     PetscFree(send_status);
3438965ea79SLois Curfman McInnes   }
3440452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
3458965ea79SLois Curfman McInnes 
3468965ea79SLois Curfman McInnes   return 0;
3478965ea79SLois Curfman McInnes }
3488965ea79SLois Curfman McInnes 
34939ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
3508965ea79SLois Curfman McInnes {
35139ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3528965ea79SLois Curfman McInnes   int          ierr;
35339ddd567SLois Curfman McInnes   if (!mdn->assembled)
35439ddd567SLois Curfman McInnes     SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first");
35539ddd567SLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
35639ddd567SLois Curfman McInnes   CHKERRQ(ierr);
35739ddd567SLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
35839ddd567SLois Curfman McInnes   CHKERRQ(ierr);
35939ddd567SLois Curfman McInnes   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
3608965ea79SLois Curfman McInnes   return 0;
3618965ea79SLois Curfman McInnes }
3628965ea79SLois Curfman McInnes 
36339ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
3648965ea79SLois Curfman McInnes {
36539ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3668965ea79SLois Curfman McInnes   int          ierr;
36739ddd567SLois Curfman McInnes   if (!mdn->assembled)
36839ddd567SLois Curfman McInnes     SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first");
3693501a2bdSLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
37039ddd567SLois Curfman McInnes   CHKERRQ(ierr);
3713501a2bdSLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
37239ddd567SLois Curfman McInnes   CHKERRQ(ierr);
37339ddd567SLois Curfman McInnes   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
3748965ea79SLois Curfman McInnes   return 0;
3758965ea79SLois Curfman McInnes }
3768965ea79SLois Curfman McInnes 
377096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
378096963f5SLois Curfman McInnes {
379096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
380096963f5SLois Curfman McInnes   int          ierr;
3813501a2bdSLois Curfman McInnes   Scalar       zero = 0.0;
382096963f5SLois Curfman McInnes 
383096963f5SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix");
3843501a2bdSLois Curfman McInnes   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
385096963f5SLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
386096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
387096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
388096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
389096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
390096963f5SLois Curfman McInnes   return 0;
391096963f5SLois Curfman McInnes }
392096963f5SLois Curfman McInnes 
393096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
394096963f5SLois Curfman McInnes {
395096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
396096963f5SLois Curfman McInnes   int          ierr;
397096963f5SLois Curfman McInnes 
398096963f5SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix");
3993501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
4003501a2bdSLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
401096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
402096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
403096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
404096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
405096963f5SLois Curfman McInnes   return 0;
406096963f5SLois Curfman McInnes }
407096963f5SLois Curfman McInnes 
40839ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v)
4098965ea79SLois Curfman McInnes {
41039ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
411096963f5SLois Curfman McInnes   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
412096963f5SLois Curfman McInnes   int          ierr, i, n, m = a->m, radd;
413096963f5SLois Curfman McInnes   Scalar       *x;
414ed3cc1f0SBarry Smith 
41539ddd567SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix");
416096963f5SLois Curfman McInnes   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
417096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
418096963f5SLois Curfman McInnes   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
419096963f5SLois Curfman McInnes   radd = a->rstart*m*m;
420096963f5SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
421096963f5SLois Curfman McInnes     x[i] = aloc->v[radd + i*m + i];
422096963f5SLois Curfman McInnes   }
423096963f5SLois Curfman McInnes   return 0;
4248965ea79SLois Curfman McInnes }
4258965ea79SLois Curfman McInnes 
42639ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj)
4278965ea79SLois Curfman McInnes {
4288965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
4293501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4308965ea79SLois Curfman McInnes   int          ierr;
431ed3cc1f0SBarry Smith 
4328965ea79SLois Curfman McInnes #if defined(PETSC_LOG)
4333501a2bdSLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
4348965ea79SLois Curfman McInnes #endif
4350452661fSBarry Smith   PetscFree(mdn->rowners);
4363501a2bdSLois Curfman McInnes   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
4373501a2bdSLois Curfman McInnes   if (mdn->lvec)   VecDestroy(mdn->lvec);
4383501a2bdSLois Curfman McInnes   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
439052cd425SLois Curfman McInnes   if (mdn->factor) PetscFree(mdn->factor);
4400452661fSBarry Smith   PetscFree(mdn);
4418965ea79SLois Curfman McInnes   PLogObjectDestroy(mat);
4420452661fSBarry Smith   PetscHeaderDestroy(mat);
4438965ea79SLois Curfman McInnes   return 0;
4448965ea79SLois Curfman McInnes }
44539ddd567SLois Curfman McInnes 
4468965ea79SLois Curfman McInnes #include "pinclude/pviewer.h"
4478965ea79SLois Curfman McInnes 
44839ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
4498965ea79SLois Curfman McInnes {
45039ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4518965ea79SLois Curfman McInnes   int          ierr;
45239ddd567SLois Curfman McInnes   if (mdn->size == 1) {
45339ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4548965ea79SLois Curfman McInnes   }
45539ddd567SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
4568965ea79SLois Curfman McInnes   return 0;
4578965ea79SLois Curfman McInnes }
4588965ea79SLois Curfman McInnes 
45939ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
4608965ea79SLois Curfman McInnes {
46139ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
46239ddd567SLois Curfman McInnes   int          ierr, format;
4638965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
4648965ea79SLois Curfman McInnes   FILE         *fd;
4658965ea79SLois Curfman McInnes 
4663501a2bdSLois Curfman McInnes   ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
4678965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
4688965ea79SLois Curfman McInnes     ierr = ViewerFileGetFormat_Private(viewer,&format);
4693501a2bdSLois Curfman McInnes     if (format == FILE_FORMAT_INFO_DETAILED) {
470096963f5SLois Curfman McInnes       int nz, nzalloc, mem, rank;
471096963f5SLois Curfman McInnes       MPI_Comm_rank(mat->comm,&rank);
472096963f5SLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
473096963f5SLois Curfman McInnes       MPIU_Seq_begin(mat->comm,1);
4743501a2bdSLois Curfman McInnes         fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
475096963f5SLois Curfman McInnes             rank,mdn->m,nz,nzalloc,mem);
476096963f5SLois Curfman McInnes       fflush(fd);
477096963f5SLois Curfman McInnes       MPIU_Seq_end(mat->comm,1);
4783501a2bdSLois Curfman McInnes       ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
4793501a2bdSLois Curfman McInnes       return 0;
4803501a2bdSLois Curfman McInnes     }
4813501a2bdSLois Curfman McInnes     else if (format == FILE_FORMAT_INFO) {
482096963f5SLois Curfman McInnes       return 0;
4838965ea79SLois Curfman McInnes     }
4848965ea79SLois Curfman McInnes   }
4858965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER) {
4868965ea79SLois Curfman McInnes     MPIU_Seq_begin(mat->comm,1);
48739ddd567SLois Curfman McInnes     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
48839ddd567SLois Curfman McInnes              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
48939ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4908965ea79SLois Curfman McInnes     fflush(fd);
4918965ea79SLois Curfman McInnes     MPIU_Seq_end(mat->comm,1);
4928965ea79SLois Curfman McInnes   }
4938965ea79SLois Curfman McInnes   else {
49439ddd567SLois Curfman McInnes     int size = mdn->size, rank = mdn->rank;
4958965ea79SLois Curfman McInnes     if (size == 1) {
49639ddd567SLois Curfman McInnes       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4978965ea79SLois Curfman McInnes     }
4988965ea79SLois Curfman McInnes     else {
4998965ea79SLois Curfman McInnes       /* assemble the entire matrix onto first processor. */
5008965ea79SLois Curfman McInnes       Mat          A;
50139ddd567SLois Curfman McInnes       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
50239ddd567SLois Curfman McInnes       Scalar       *vals;
50339ddd567SLois Curfman McInnes       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
5048965ea79SLois Curfman McInnes 
5058965ea79SLois Curfman McInnes       if (!rank) {
506*ed2daf61SLois Curfman McInnes         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PetscNull,&A); CHKERRQ(ierr);
5078965ea79SLois Curfman McInnes       }
5088965ea79SLois Curfman McInnes       else {
509*ed2daf61SLois Curfman McInnes         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PetscNull,&A); CHKERRQ(ierr);
5108965ea79SLois Curfman McInnes       }
5118965ea79SLois Curfman McInnes       PLogObjectParent(mat,A);
5128965ea79SLois Curfman McInnes 
51339ddd567SLois Curfman McInnes       /* Copy the matrix ... This isn't the most efficient means,
51439ddd567SLois Curfman McInnes          but it's quick for now */
51539ddd567SLois Curfman McInnes       row = mdn->rstart; m = Amdn->m;
5168965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
51739ddd567SLois Curfman McInnes         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
51839ddd567SLois Curfman McInnes         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
51939ddd567SLois Curfman McInnes         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
52039ddd567SLois Curfman McInnes         row++;
5218965ea79SLois Curfman McInnes       }
5228965ea79SLois Curfman McInnes 
5238965ea79SLois Curfman McInnes       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5248965ea79SLois Curfman McInnes       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5258965ea79SLois Curfman McInnes       if (!rank) {
52639ddd567SLois Curfman McInnes         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
5278965ea79SLois Curfman McInnes       }
5288965ea79SLois Curfman McInnes       ierr = MatDestroy(A); CHKERRQ(ierr);
5298965ea79SLois Curfman McInnes     }
5308965ea79SLois Curfman McInnes   }
5318965ea79SLois Curfman McInnes   return 0;
5328965ea79SLois Curfman McInnes }
5338965ea79SLois Curfman McInnes 
53439ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer)
5358965ea79SLois Curfman McInnes {
5368965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
53739ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
5388965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
53939ddd567SLois Curfman McInnes   int          ierr;
5408965ea79SLois Curfman McInnes 
54139ddd567SLois Curfman McInnes   if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix");
5428965ea79SLois Curfman McInnes   if (!viewer) {
5438965ea79SLois Curfman McInnes     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
5448965ea79SLois Curfman McInnes   }
54539ddd567SLois Curfman McInnes   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
54639ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5478965ea79SLois Curfman McInnes   }
5488965ea79SLois Curfman McInnes   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
54939ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5508965ea79SLois Curfman McInnes   }
5518965ea79SLois Curfman McInnes   else if (vobj->type == BINARY_FILE_VIEWER) {
55239ddd567SLois Curfman McInnes     return MatView_MPIDense_Binary(mat,viewer);
5538965ea79SLois Curfman McInnes   }
5548965ea79SLois Curfman McInnes   return 0;
5558965ea79SLois Curfman McInnes }
5568965ea79SLois Curfman McInnes 
5573501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,
5588965ea79SLois Curfman McInnes                              int *nzalloc,int *mem)
5598965ea79SLois Curfman McInnes {
5603501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5613501a2bdSLois Curfman McInnes   Mat          mdn = mat->A;
56239ddd567SLois Curfman McInnes   int          ierr, isend[3], irecv[3];
5638965ea79SLois Curfman McInnes 
5643501a2bdSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
5658965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
5668965ea79SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
5678965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
5683501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
5698965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5708965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
5713501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
5728965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5738965ea79SLois Curfman McInnes   }
5748965ea79SLois Curfman McInnes   return 0;
5758965ea79SLois Curfman McInnes }
5768965ea79SLois Curfman McInnes 
57739ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op)
5788965ea79SLois Curfman McInnes {
57939ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
5808965ea79SLois Curfman McInnes 
5818965ea79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
5828965ea79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
5838965ea79SLois Curfman McInnes       op == COLUMNS_SORTED ||
5848965ea79SLois Curfman McInnes       op == ROW_ORIENTED) {
5858965ea79SLois Curfman McInnes         MatSetOption(a->A,op);
5868965ea79SLois Curfman McInnes   }
5878965ea79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
5888965ea79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
5898965ea79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
5908965ea79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
59139ddd567SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
5928965ea79SLois Curfman McInnes   else if (op == COLUMN_ORIENTED)
59339ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");}
5948965ea79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
59539ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
5968965ea79SLois Curfman McInnes   else
59739ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
5988965ea79SLois Curfman McInnes   return 0;
5998965ea79SLois Curfman McInnes }
6008965ea79SLois Curfman McInnes 
6013501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n)
6028965ea79SLois Curfman McInnes {
6033501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6048965ea79SLois Curfman McInnes   *m = mat->M; *n = mat->N;
6058965ea79SLois Curfman McInnes   return 0;
6068965ea79SLois Curfman McInnes }
6078965ea79SLois Curfman McInnes 
6083501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
6098965ea79SLois Curfman McInnes {
6103501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6118965ea79SLois Curfman McInnes   *m = mat->m; *n = mat->N;
6128965ea79SLois Curfman McInnes   return 0;
6138965ea79SLois Curfman McInnes }
6148965ea79SLois Curfman McInnes 
6153501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
6168965ea79SLois Curfman McInnes {
6173501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6188965ea79SLois Curfman McInnes   *m = mat->rstart; *n = mat->rend;
6198965ea79SLois Curfman McInnes   return 0;
6208965ea79SLois Curfman McInnes }
6218965ea79SLois Curfman McInnes 
6223501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
6238965ea79SLois Curfman McInnes {
6243501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
62539ddd567SLois Curfman McInnes   int          lrow, rstart = mat->rstart, rend = mat->rend;
6268965ea79SLois Curfman McInnes 
62739ddd567SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
6288965ea79SLois Curfman McInnes   lrow = row - rstart;
62939ddd567SLois Curfman McInnes   return MatGetRow(mat->A,lrow,nz,idx,v);
6308965ea79SLois Curfman McInnes }
6318965ea79SLois Curfman McInnes 
63239ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
6338965ea79SLois Curfman McInnes {
6340452661fSBarry Smith   if (idx) PetscFree(*idx);
6350452661fSBarry Smith   if (v) PetscFree(*v);
6368965ea79SLois Curfman McInnes   return 0;
6378965ea79SLois Curfman McInnes }
6388965ea79SLois Curfman McInnes 
639cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
640096963f5SLois Curfman McInnes {
6413501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
6423501a2bdSLois Curfman McInnes   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
6433501a2bdSLois Curfman McInnes   int          ierr, i, j;
6443501a2bdSLois Curfman McInnes   double       sum = 0.0;
6453501a2bdSLois Curfman McInnes   Scalar       *v = mat->v;
6463501a2bdSLois Curfman McInnes 
6473501a2bdSLois Curfman McInnes   if (!mdn->assembled) SETERRQ(1,"MatNorm_MPIDense:Must assemble matrix");
6483501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
6493501a2bdSLois Curfman McInnes     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
6503501a2bdSLois Curfman McInnes   } else {
6513501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
6523501a2bdSLois Curfman McInnes       for (i=0; i<mat->n*mat->m; i++ ) {
6533501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX)
6543501a2bdSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
6553501a2bdSLois Curfman McInnes #else
6563501a2bdSLois Curfman McInnes         sum += (*v)*(*v); v++;
6573501a2bdSLois Curfman McInnes #endif
6583501a2bdSLois Curfman McInnes       }
6593501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
6603501a2bdSLois Curfman McInnes       *norm = sqrt(*norm);
6613501a2bdSLois Curfman McInnes       PLogFlops(2*mat->n*mat->m);
6623501a2bdSLois Curfman McInnes     }
6633501a2bdSLois Curfman McInnes     else if (type == NORM_1) {
6643501a2bdSLois Curfman McInnes       double *tmp, *tmp2;
6650452661fSBarry Smith       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
6663501a2bdSLois Curfman McInnes       tmp2 = tmp + mdn->N;
667cddf8d76SBarry Smith       PetscMemzero(tmp,2*mdn->N*sizeof(double));
668096963f5SLois Curfman McInnes       *norm = 0.0;
6693501a2bdSLois Curfman McInnes       v = mat->v;
6703501a2bdSLois Curfman McInnes       for ( j=0; j<mat->n; j++ ) {
6713501a2bdSLois Curfman McInnes         for ( i=0; i<mat->m; i++ ) {
67267e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
6733501a2bdSLois Curfman McInnes         }
6743501a2bdSLois Curfman McInnes       }
6753501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
6763501a2bdSLois Curfman McInnes       for ( j=0; j<mdn->N; j++ ) {
6773501a2bdSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
6783501a2bdSLois Curfman McInnes       }
6790452661fSBarry Smith       PetscFree(tmp);
6803501a2bdSLois Curfman McInnes       PLogFlops(mat->n*mat->m);
6813501a2bdSLois Curfman McInnes     }
6823501a2bdSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
6833501a2bdSLois Curfman McInnes       double ntemp;
6843501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
6853501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
6863501a2bdSLois Curfman McInnes     }
6873501a2bdSLois Curfman McInnes     else {
6883501a2bdSLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
6893501a2bdSLois Curfman McInnes     }
6903501a2bdSLois Curfman McInnes   }
6913501a2bdSLois Curfman McInnes   return 0;
6923501a2bdSLois Curfman McInnes }
6933501a2bdSLois Curfman McInnes 
6943501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout)
6953501a2bdSLois Curfman McInnes {
6963501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
6973501a2bdSLois Curfman McInnes   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
6983501a2bdSLois Curfman McInnes   Mat          B;
6993501a2bdSLois Curfman McInnes   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
7003501a2bdSLois Curfman McInnes   int          j, i, ierr;
7013501a2bdSLois Curfman McInnes   Scalar       *v;
7023501a2bdSLois Curfman McInnes 
7033501a2bdSLois Curfman McInnes   if (!matout && M != N)
7043501a2bdSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
705*ed2daf61SLois Curfman McInnes   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PetscNull,&B);
706*ed2daf61SLois Curfman McInnes          CHKERRQ(ierr);
7073501a2bdSLois Curfman McInnes 
7083501a2bdSLois Curfman McInnes   m = Aloc->m; n = Aloc->n; v = Aloc->v;
7090452661fSBarry Smith   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
7103501a2bdSLois Curfman McInnes   for ( j=0; j<n; j++ ) {
7113501a2bdSLois Curfman McInnes     for (i=0; i<m; i++) rwork[i] = rstart + i;
7123501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
7133501a2bdSLois Curfman McInnes     v += m;
7143501a2bdSLois Curfman McInnes   }
7150452661fSBarry Smith   PetscFree(rwork);
7163501a2bdSLois Curfman McInnes   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
7173501a2bdSLois Curfman McInnes   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
7183501a2bdSLois Curfman McInnes   if (matout) {
7193501a2bdSLois Curfman McInnes     *matout = B;
7203501a2bdSLois Curfman McInnes   } else {
7213501a2bdSLois Curfman McInnes     /* This isn't really an in-place transpose, but free data struct from a */
7220452661fSBarry Smith     PetscFree(a->rowners);
7233501a2bdSLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
7243501a2bdSLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
7253501a2bdSLois Curfman McInnes     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
7260452661fSBarry Smith     PetscFree(a);
7273501a2bdSLois Curfman McInnes     PetscMemcpy(A,B,sizeof(struct _Mat));
7280452661fSBarry Smith     PetscHeaderDestroy(B);
7293501a2bdSLois Curfman McInnes   }
730096963f5SLois Curfman McInnes   return 0;
731096963f5SLois Curfman McInnes }
732096963f5SLois Curfman McInnes 
73355659b69SBarry Smith static int MatCopyPrivate_MPIDense(Mat,Mat *,int);
7348965ea79SLois Curfman McInnes 
7358965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
73639ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense,
73739ddd567SLois Curfman McInnes        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
73839ddd567SLois Curfman McInnes        MatMult_MPIDense,MatMultAdd_MPIDense,
739096963f5SLois Curfman McInnes        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
74039ddd567SLois Curfman McInnes        0,0,
74139ddd567SLois Curfman McInnes        0,0,0,
7423501a2bdSLois Curfman McInnes        0,0,MatTranspose_MPIDense,
74339ddd567SLois Curfman McInnes        MatGetInfo_MPIDense,0,
744096963f5SLois Curfman McInnes        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
74539ddd567SLois Curfman McInnes        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
7468965ea79SLois Curfman McInnes        0,
74739ddd567SLois Curfman McInnes        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
74839ddd567SLois Curfman McInnes        0,
74939ddd567SLois Curfman McInnes        0,0,0,0,
75039ddd567SLois Curfman McInnes        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
75139ddd567SLois Curfman McInnes        MatGetOwnershipRange_MPIDense,
75239ddd567SLois Curfman McInnes        0,0,
753b49de8d1SLois Curfman McInnes        0,0,0,0,0,MatCopyPrivate_MPIDense,
754b49de8d1SLois Curfman McInnes        0,0,0,0,0,
755b49de8d1SLois Curfman McInnes        0,0,MatGetValues_MPIDense};
7568965ea79SLois Curfman McInnes 
7578965ea79SLois Curfman McInnes /*@C
75839ddd567SLois Curfman McInnes    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
7598965ea79SLois Curfman McInnes 
7608965ea79SLois Curfman McInnes    Input Parameters:
7618965ea79SLois Curfman McInnes .  comm - MPI communicator
7628965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
7638965ea79SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
7648965ea79SLois Curfman McInnes            if N is given)
7658965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
7668965ea79SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
7678965ea79SLois Curfman McInnes            if n is given)
768dfc5480cSLois Curfman McInnes .  data - optional location of matrix data.  Set data=PetscNull for PETSc
769dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
7708965ea79SLois Curfman McInnes 
7718965ea79SLois Curfman McInnes    Output Parameter:
7728965ea79SLois Curfman McInnes .  newmat - the matrix
7738965ea79SLois Curfman McInnes 
7748965ea79SLois Curfman McInnes    Notes:
77539ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
77639ddd567SLois Curfman McInnes    storage by columns.
7778965ea79SLois Curfman McInnes 
77818f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
77918f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
780dfc5480cSLois Curfman McInnes    set data=PetscNull.
78118f449edSLois Curfman McInnes 
7828965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
7838965ea79SLois Curfman McInnes    (possibly both).
7848965ea79SLois Curfman McInnes 
7853501a2bdSLois Curfman McInnes    Currently, the only parallel dense matrix decomposition is by rows,
7863501a2bdSLois Curfman McInnes    so that n=N and each submatrix owns all of the global columns.
7873501a2bdSLois Curfman McInnes 
78839ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel
7898965ea79SLois Curfman McInnes 
79039ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
7918965ea79SLois Curfman McInnes @*/
79218f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat)
7938965ea79SLois Curfman McInnes {
7948965ea79SLois Curfman McInnes   Mat          mat;
79539ddd567SLois Curfman McInnes   Mat_MPIDense *a;
79639ddd567SLois Curfman McInnes   int          ierr, i;
7978965ea79SLois Curfman McInnes 
798*ed2daf61SLois Curfman McInnes /* Note:  For now, when data is specified above, this assumes the user correctly
799*ed2daf61SLois Curfman McInnes    allocates the local dense storage space.  We should add error checking. */
80018f449edSLois Curfman McInnes 
8018965ea79SLois Curfman McInnes   *newmat         = 0;
8020452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
8038965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
8040452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
8058965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
80639ddd567SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIDense;
80739ddd567SLois Curfman McInnes   mat->view       = MatView_MPIDense;
8088965ea79SLois Curfman McInnes   mat->factor     = 0;
8098965ea79SLois Curfman McInnes 
8108965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
8118965ea79SLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
8128965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
8138965ea79SLois Curfman McInnes 
81439ddd567SLois Curfman McInnes   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
8158965ea79SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
81639ddd567SLois Curfman McInnes 
81739ddd567SLois Curfman McInnes   /* each row stores all columns */
81839ddd567SLois Curfman McInnes   if (N == PETSC_DECIDE) N = n;
819d7e8b826SBarry Smith   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
820d7e8b826SBarry Smith   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
8218965ea79SLois Curfman McInnes   a->N = N;
8228965ea79SLois Curfman McInnes   a->M = M;
82339ddd567SLois Curfman McInnes   a->m = m;
82439ddd567SLois Curfman McInnes   a->n = n;
8258965ea79SLois Curfman McInnes 
8268965ea79SLois Curfman McInnes   /* build local table of row and column ownerships */
827d7e8b826SBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
828d7e8b826SBarry Smith   a->cowners = a->rowners + a->size + 1;
829d7e8b826SBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
83039ddd567SLois Curfman McInnes                        sizeof(Mat_MPIDense));
8318965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
8328965ea79SLois Curfman McInnes   a->rowners[0] = 0;
8338965ea79SLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
8348965ea79SLois Curfman McInnes     a->rowners[i] += a->rowners[i-1];
8358965ea79SLois Curfman McInnes   }
8368965ea79SLois Curfman McInnes   a->rstart = a->rowners[a->rank];
8378965ea79SLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
838d7e8b826SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
839d7e8b826SBarry Smith   a->cowners[0] = 0;
840d7e8b826SBarry Smith   for ( i=2; i<=a->size; i++ ) {
841d7e8b826SBarry Smith     a->cowners[i] += a->cowners[i-1];
842d7e8b826SBarry Smith   }
8438965ea79SLois Curfman McInnes 
84418f449edSLois Curfman McInnes   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
8458965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
8468965ea79SLois Curfman McInnes 
8478965ea79SLois Curfman McInnes   /* build cache for off array entries formed */
8488965ea79SLois Curfman McInnes   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
8498965ea79SLois Curfman McInnes 
8508965ea79SLois Curfman McInnes   /* stuff used for matrix vector multiply */
8518965ea79SLois Curfman McInnes   a->lvec      = 0;
8528965ea79SLois Curfman McInnes   a->Mvctx     = 0;
8538965ea79SLois Curfman McInnes   a->assembled = 0;
8548965ea79SLois Curfman McInnes 
8558965ea79SLois Curfman McInnes   *newmat = mat;
8568965ea79SLois Curfman McInnes   return 0;
8578965ea79SLois Curfman McInnes }
8588965ea79SLois Curfman McInnes 
8593501a2bdSLois Curfman McInnes static int MatCopyPrivate_MPIDense(Mat A,Mat *newmat,int cpvalues)
8608965ea79SLois Curfman McInnes {
8618965ea79SLois Curfman McInnes   Mat          mat;
8623501a2bdSLois Curfman McInnes   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
86339ddd567SLois Curfman McInnes   int          ierr;
8648965ea79SLois Curfman McInnes 
86539ddd567SLois Curfman McInnes   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix");
8668965ea79SLois Curfman McInnes   *newmat       = 0;
8670452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
8688965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
8690452661fSBarry Smith   mat->data     = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
8708965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
87139ddd567SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIDense;
87239ddd567SLois Curfman McInnes   mat->view     = MatView_MPIDense;
8733501a2bdSLois Curfman McInnes   mat->factor   = A->factor;
8748965ea79SLois Curfman McInnes 
8758965ea79SLois Curfman McInnes   a->m          = oldmat->m;
8768965ea79SLois Curfman McInnes   a->n          = oldmat->n;
8778965ea79SLois Curfman McInnes   a->M          = oldmat->M;
8788965ea79SLois Curfman McInnes   a->N          = oldmat->N;
8798965ea79SLois Curfman McInnes 
8808965ea79SLois Curfman McInnes   a->assembled  = 1;
8818965ea79SLois Curfman McInnes   a->rstart     = oldmat->rstart;
8828965ea79SLois Curfman McInnes   a->rend       = oldmat->rend;
8838965ea79SLois Curfman McInnes   a->size       = oldmat->size;
8848965ea79SLois Curfman McInnes   a->rank       = oldmat->rank;
8858965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
8868965ea79SLois Curfman McInnes 
8870452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
88839ddd567SLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
8898965ea79SLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
8908965ea79SLois Curfman McInnes   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
8918965ea79SLois Curfman McInnes 
8928965ea79SLois Curfman McInnes   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
8938965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->lvec);
89455659b69SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
8958965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->Mvctx);
8968965ea79SLois Curfman McInnes   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
8978965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
8988965ea79SLois Curfman McInnes   *newmat = mat;
8998965ea79SLois Curfman McInnes   return 0;
9008965ea79SLois Curfman McInnes }
9018965ea79SLois Curfman McInnes 
9028965ea79SLois Curfman McInnes #include "sysio.h"
9038965ea79SLois Curfman McInnes 
90439ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
9058965ea79SLois Curfman McInnes {
9068965ea79SLois Curfman McInnes   Mat          A;
9078965ea79SLois Curfman McInnes   int          i, nz, ierr, j,rstart, rend, fd;
9088965ea79SLois Curfman McInnes   Scalar       *vals,*svals;
9098965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) bview;
9108965ea79SLois Curfman McInnes   MPI_Comm     comm = vobj->comm;
9118965ea79SLois Curfman McInnes   MPI_Status   status;
9128965ea79SLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
9138965ea79SLois Curfman McInnes   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
9148965ea79SLois Curfman McInnes   int          tag = ((PetscObject)bview)->tag;
9158965ea79SLois Curfman McInnes 
9168965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
9178965ea79SLois Curfman McInnes   if (!rank) {
9188965ea79SLois Curfman McInnes     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
9198965ea79SLois Curfman McInnes     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
92039ddd567SLois Curfman McInnes     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
9218965ea79SLois Curfman McInnes   }
9228965ea79SLois Curfman McInnes 
9238965ea79SLois Curfman McInnes   MPI_Bcast(header+1,3,MPI_INT,0,comm);
9248965ea79SLois Curfman McInnes   M = header[1]; N = header[2];
9258965ea79SLois Curfman McInnes   /* determine ownership of all rows */
9268965ea79SLois Curfman McInnes   m = M/size + ((M % size) > rank);
9270452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
9288965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
9298965ea79SLois Curfman McInnes   rowners[0] = 0;
9308965ea79SLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
9318965ea79SLois Curfman McInnes     rowners[i] += rowners[i-1];
9328965ea79SLois Curfman McInnes   }
9338965ea79SLois Curfman McInnes   rstart = rowners[rank];
9348965ea79SLois Curfman McInnes   rend   = rowners[rank+1];
9358965ea79SLois Curfman McInnes 
9368965ea79SLois Curfman McInnes   /* distribute row lengths to all processors */
9370452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
9388965ea79SLois Curfman McInnes   offlens = ourlens + (rend-rstart);
9398965ea79SLois Curfman McInnes   if (!rank) {
9400452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
9418965ea79SLois Curfman McInnes     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
9420452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
9438965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
9448965ea79SLois Curfman McInnes     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
9450452661fSBarry Smith     PetscFree(sndcounts);
9468965ea79SLois Curfman McInnes   }
9478965ea79SLois Curfman McInnes   else {
9488965ea79SLois Curfman McInnes     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
9498965ea79SLois Curfman McInnes   }
9508965ea79SLois Curfman McInnes 
9518965ea79SLois Curfman McInnes   if (!rank) {
9528965ea79SLois Curfman McInnes     /* calculate the number of nonzeros on each processor */
9530452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
954cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
9558965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
9568965ea79SLois Curfman McInnes       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
9578965ea79SLois Curfman McInnes         procsnz[i] += rowlengths[j];
9588965ea79SLois Curfman McInnes       }
9598965ea79SLois Curfman McInnes     }
9600452661fSBarry Smith     PetscFree(rowlengths);
9618965ea79SLois Curfman McInnes 
9628965ea79SLois Curfman McInnes     /* determine max buffer needed and allocate it */
9638965ea79SLois Curfman McInnes     maxnz = 0;
9648965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
9650452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
9668965ea79SLois Curfman McInnes     }
9670452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
9688965ea79SLois Curfman McInnes 
9698965ea79SLois Curfman McInnes     /* read in my part of the matrix column indices  */
9708965ea79SLois Curfman McInnes     nz = procsnz[0];
9710452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
9728965ea79SLois Curfman McInnes     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
9738965ea79SLois Curfman McInnes 
9748965ea79SLois Curfman McInnes     /* read in every one elses and ship off */
9758965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
9768965ea79SLois Curfman McInnes       nz = procsnz[i];
9778965ea79SLois Curfman McInnes       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
9788965ea79SLois Curfman McInnes       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
9798965ea79SLois Curfman McInnes     }
9800452661fSBarry Smith     PetscFree(cols);
9818965ea79SLois Curfman McInnes   }
9828965ea79SLois Curfman McInnes   else {
9838965ea79SLois Curfman McInnes     /* determine buffer space needed for message */
9848965ea79SLois Curfman McInnes     nz = 0;
9858965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
9868965ea79SLois Curfman McInnes       nz += ourlens[i];
9878965ea79SLois Curfman McInnes     }
9880452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
9898965ea79SLois Curfman McInnes 
9908965ea79SLois Curfman McInnes     /* receive message of column indices*/
9918965ea79SLois Curfman McInnes     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
9928965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPI_INT,&maxnz);
99339ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
9948965ea79SLois Curfman McInnes   }
9958965ea79SLois Curfman McInnes 
9968965ea79SLois Curfman McInnes   /* loop over local rows, determining number of off diagonal entries */
997cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
9988965ea79SLois Curfman McInnes   jj = 0;
9998965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10008965ea79SLois Curfman McInnes     for ( j=0; j<ourlens[i]; j++ ) {
10018965ea79SLois Curfman McInnes       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
10028965ea79SLois Curfman McInnes       jj++;
10038965ea79SLois Curfman McInnes     }
10048965ea79SLois Curfman McInnes   }
10058965ea79SLois Curfman McInnes 
10068965ea79SLois Curfman McInnes   /* create our matrix */
10078965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10088965ea79SLois Curfman McInnes     ourlens[i] -= offlens[i];
10098965ea79SLois Curfman McInnes   }
101039ddd567SLois Curfman McInnes   if (type == MATMPIDENSE) {
1011*ed2daf61SLois Curfman McInnes     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PetscNull,newmat); CHKERRQ(ierr);
10128965ea79SLois Curfman McInnes   }
10138965ea79SLois Curfman McInnes   A = *newmat;
10148965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10158965ea79SLois Curfman McInnes     ourlens[i] += offlens[i];
10168965ea79SLois Curfman McInnes   }
10178965ea79SLois Curfman McInnes 
10188965ea79SLois Curfman McInnes   if (!rank) {
10190452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
10208965ea79SLois Curfman McInnes 
10218965ea79SLois Curfman McInnes     /* read in my part of the matrix numerical values  */
10228965ea79SLois Curfman McInnes     nz = procsnz[0];
10238965ea79SLois Curfman McInnes     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
10248965ea79SLois Curfman McInnes 
10258965ea79SLois Curfman McInnes     /* insert into matrix */
10268965ea79SLois Curfman McInnes     jj      = rstart;
10278965ea79SLois Curfman McInnes     smycols = mycols;
10288965ea79SLois Curfman McInnes     svals   = vals;
10298965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10308965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10318965ea79SLois Curfman McInnes       smycols += ourlens[i];
10328965ea79SLois Curfman McInnes       svals   += ourlens[i];
10338965ea79SLois Curfman McInnes       jj++;
10348965ea79SLois Curfman McInnes     }
10358965ea79SLois Curfman McInnes 
10368965ea79SLois Curfman McInnes     /* read in other processors and ship out */
10378965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
10388965ea79SLois Curfman McInnes       nz = procsnz[i];
10398965ea79SLois Curfman McInnes       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
10408965ea79SLois Curfman McInnes       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
10418965ea79SLois Curfman McInnes     }
10420452661fSBarry Smith     PetscFree(procsnz);
10438965ea79SLois Curfman McInnes   }
10448965ea79SLois Curfman McInnes   else {
10458965ea79SLois Curfman McInnes     /* receive numeric values */
10460452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
10478965ea79SLois Curfman McInnes 
10488965ea79SLois Curfman McInnes     /* receive message of values*/
10498965ea79SLois Curfman McInnes     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
10508965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
105139ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
10528965ea79SLois Curfman McInnes 
10538965ea79SLois Curfman McInnes     /* insert into matrix */
10548965ea79SLois Curfman McInnes     jj      = rstart;
10558965ea79SLois Curfman McInnes     smycols = mycols;
10568965ea79SLois Curfman McInnes     svals   = vals;
10578965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10588965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10598965ea79SLois Curfman McInnes       smycols += ourlens[i];
10608965ea79SLois Curfman McInnes       svals   += ourlens[i];
10618965ea79SLois Curfman McInnes       jj++;
10628965ea79SLois Curfman McInnes     }
10638965ea79SLois Curfman McInnes   }
10640452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
10658965ea79SLois Curfman McInnes 
10668965ea79SLois Curfman McInnes   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
10678965ea79SLois Curfman McInnes   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
10688965ea79SLois Curfman McInnes   return 0;
10698965ea79SLois Curfman McInnes }
1070