xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 6d84be18fbb99ba69be7b8bdde5411a66955b7ea)
18965ea79SLois Curfman McInnes #ifndef lint
2*6d84be18SBarry Smith static char vcid[] = "$Id: mpidense.c,v 1.28 1996/02/01 18:52:35 curfman Exp bsmith $";
38965ea79SLois Curfman McInnes #endif
48965ea79SLois Curfman McInnes 
5ed3cc1f0SBarry Smith /*
6ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
7ed3cc1f0SBarry Smith */
8ed3cc1f0SBarry Smith 
98965ea79SLois Curfman McInnes #include "mpidense.h"
108965ea79SLois Curfman McInnes #include "vec/vecimpl.h"
118965ea79SLois Curfman McInnes 
1239ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
138965ea79SLois Curfman McInnes                                int *idxn,Scalar *v,InsertMode addv)
148965ea79SLois Curfman McInnes {
1539b7565bSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
1639b7565bSBarry Smith   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
1739b7565bSBarry Smith   int          roworiented = A->roworiented;
188965ea79SLois Curfman McInnes 
1939b7565bSBarry Smith   if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) {
2039ddd567SLois Curfman McInnes     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
218965ea79SLois Curfman McInnes   }
2239b7565bSBarry Smith   A->insertmode = addv;
238965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
2439ddd567SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
2539b7565bSBarry Smith     if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
268965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
278965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
2839b7565bSBarry Smith       if (roworiented) {
2939b7565bSBarry Smith         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
3039b7565bSBarry Smith       }
3139b7565bSBarry Smith       else {
328965ea79SLois Curfman McInnes         for ( j=0; j<n; j++ ) {
3339ddd567SLois Curfman McInnes           if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
3439b7565bSBarry Smith           if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large");
3539b7565bSBarry Smith           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
3639b7565bSBarry Smith         }
378965ea79SLois Curfman McInnes       }
388965ea79SLois Curfman McInnes     }
398965ea79SLois Curfman McInnes     else {
4039b7565bSBarry Smith       if (roworiented) {
4139b7565bSBarry Smith         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
4239b7565bSBarry Smith       }
4339b7565bSBarry Smith       else { /* must stash each seperately */
4439b7565bSBarry Smith         row = idxm[i];
4539b7565bSBarry Smith         for ( j=0; j<n; j++ ) {
4639b7565bSBarry Smith           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);
4739b7565bSBarry Smith                  CHKERRQ(ierr);
4839b7565bSBarry Smith         }
4939b7565bSBarry Smith       }
50b49de8d1SLois Curfman McInnes     }
51b49de8d1SLois Curfman McInnes   }
52b49de8d1SLois Curfman McInnes   return 0;
53b49de8d1SLois Curfman McInnes }
54b49de8d1SLois Curfman McInnes 
55b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
56b49de8d1SLois Curfman McInnes {
57b49de8d1SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
58b49de8d1SLois Curfman McInnes   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
59b49de8d1SLois Curfman McInnes 
60b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
61b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row");
62b49de8d1SLois Curfman McInnes     if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large");
63b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
64b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
65b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
66b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column");
67b49de8d1SLois Curfman McInnes         if (idxn[j] >= mdn->N)
68b49de8d1SLois Curfman McInnes           SETERRQ(1,"MatGetValues_MPIDense:Column too large");
69b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
70b49de8d1SLois Curfman McInnes       }
71b49de8d1SLois Curfman McInnes     }
72b49de8d1SLois Curfman McInnes     else {
73b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported");
748965ea79SLois Curfman McInnes     }
758965ea79SLois Curfman McInnes   }
768965ea79SLois Curfman McInnes   return 0;
778965ea79SLois Curfman McInnes }
788965ea79SLois Curfman McInnes 
7939ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
808965ea79SLois Curfman McInnes {
8139ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
828965ea79SLois Curfman McInnes   MPI_Comm     comm = mat->comm;
8339ddd567SLois Curfman McInnes   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
848965ea79SLois Curfman McInnes   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
8539ddd567SLois Curfman McInnes   int          tag = mat->tag, *owner,*starts,count,ierr;
868965ea79SLois Curfman McInnes   InsertMode   addv;
8739ddd567SLois Curfman McInnes   MPI_Request  *send_waits,*recv_waits;
888965ea79SLois Curfman McInnes   Scalar       *rvalues,*svalues;
898965ea79SLois Curfman McInnes 
908965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
91*6d84be18SBarry Smith   MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
9239ddd567SLois Curfman McInnes   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
9339ddd567SLois Curfman McInnes     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
948965ea79SLois Curfman McInnes     }
9539ddd567SLois Curfman McInnes   mdn->insertmode = addv; /* in case this processor had no cache */
968965ea79SLois Curfman McInnes 
978965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
980452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
99cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1000452661fSBarry Smith   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
10139ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
10239ddd567SLois Curfman McInnes     idx = mdn->stash.idx[i];
1038965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1048965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
1058965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1068965ea79SLois Curfman McInnes       }
1078965ea79SLois Curfman McInnes     }
1088965ea79SLois Curfman McInnes   }
1098965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1108965ea79SLois Curfman McInnes 
1118965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
1120452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
113*6d84be18SBarry Smith   MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);
1148965ea79SLois Curfman McInnes   nreceives = work[rank];
115*6d84be18SBarry Smith   MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);
1168965ea79SLois Curfman McInnes   nmax = work[rank];
1170452661fSBarry Smith   PetscFree(work);
1188965ea79SLois Curfman McInnes 
1198965ea79SLois Curfman McInnes   /* post receives:
1208965ea79SLois Curfman McInnes        1) each message will consist of ordered pairs
1218965ea79SLois Curfman McInnes      (global index,value) we store the global index as a double
1228965ea79SLois Curfman McInnes      to simplify the message passing.
1238965ea79SLois Curfman McInnes        2) since we don't know how long each individual message is we
1248965ea79SLois Curfman McInnes      allocate the largest needed buffer for each receive. Potentially
1258965ea79SLois Curfman McInnes      this is a lot of wasted space.
1268965ea79SLois Curfman McInnes 
1278965ea79SLois Curfman McInnes        This could be done better.
1288965ea79SLois Curfman McInnes   */
1290452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
1308965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
1310452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
1328965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
1338965ea79SLois Curfman McInnes   for ( i=0; i<nreceives; i++ ) {
134*6d84be18SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1358965ea79SLois Curfman McInnes               comm,recv_waits+i);
1368965ea79SLois Curfman McInnes   }
1378965ea79SLois Curfman McInnes 
1388965ea79SLois Curfman McInnes   /* do sends:
1398965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
1408965ea79SLois Curfman McInnes          the ith processor
1418965ea79SLois Curfman McInnes   */
1420452661fSBarry Smith   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
14339ddd567SLois Curfman McInnes   CHKPTRQ(svalues);
1440452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
1458965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
1460452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1478965ea79SLois Curfman McInnes   starts[0] = 0;
1488965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
14939ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
15039ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
15139ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
15239ddd567SLois Curfman McInnes     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
1538965ea79SLois Curfman McInnes   }
1540452661fSBarry Smith   PetscFree(owner);
1558965ea79SLois Curfman McInnes   starts[0] = 0;
1568965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1578965ea79SLois Curfman McInnes   count = 0;
1588965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
1598965ea79SLois Curfman McInnes     if (procs[i]) {
160*6d84be18SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
1618965ea79SLois Curfman McInnes                 comm,send_waits+count++);
1628965ea79SLois Curfman McInnes     }
1638965ea79SLois Curfman McInnes   }
1640452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
1658965ea79SLois Curfman McInnes 
1668965ea79SLois Curfman McInnes   /* Free cache space */
16739ddd567SLois Curfman McInnes   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
1688965ea79SLois Curfman McInnes 
16939ddd567SLois Curfman McInnes   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
17039ddd567SLois Curfman McInnes   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
17139ddd567SLois Curfman McInnes   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
17239ddd567SLois Curfman McInnes   mdn->rmax       = nmax;
1738965ea79SLois Curfman McInnes 
1748965ea79SLois Curfman McInnes   return 0;
1758965ea79SLois Curfman McInnes }
17639ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat);
1778965ea79SLois Curfman McInnes 
17839ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
1798965ea79SLois Curfman McInnes {
18039ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1818965ea79SLois Curfman McInnes   MPI_Status   *send_status,recv_status;
18239ddd567SLois Curfman McInnes   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
1838965ea79SLois Curfman McInnes   Scalar       *values,val;
18439ddd567SLois Curfman McInnes   InsertMode   addv = mdn->insertmode;
1858965ea79SLois Curfman McInnes 
1868965ea79SLois Curfman McInnes   /*  wait on receives */
1878965ea79SLois Curfman McInnes   while (count) {
18839ddd567SLois Curfman McInnes     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
1898965ea79SLois Curfman McInnes     /* unpack receives into our local space */
19039ddd567SLois Curfman McInnes     values = mdn->rvalues + 3*imdex*mdn->rmax;
1918965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
1928965ea79SLois Curfman McInnes     n = n/3;
1938965ea79SLois Curfman McInnes     for ( i=0; i<n; i++ ) {
194227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - mdn->rstart;
195227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
1968965ea79SLois Curfman McInnes       val = values[3*i+2];
19739ddd567SLois Curfman McInnes       if (col >= 0 && col < mdn->N) {
19839ddd567SLois Curfman McInnes         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
1998965ea79SLois Curfman McInnes       }
20039ddd567SLois Curfman McInnes       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
2018965ea79SLois Curfman McInnes     }
2028965ea79SLois Curfman McInnes     count--;
2038965ea79SLois Curfman McInnes   }
2040452661fSBarry Smith   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
2058965ea79SLois Curfman McInnes 
2068965ea79SLois Curfman McInnes   /* wait on sends */
20739ddd567SLois Curfman McInnes   if (mdn->nsends) {
2080452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) );
2098965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
21039ddd567SLois Curfman McInnes     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
2110452661fSBarry Smith     PetscFree(send_status);
2128965ea79SLois Curfman McInnes   }
2130452661fSBarry Smith   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
2148965ea79SLois Curfman McInnes 
21539ddd567SLois Curfman McInnes   mdn->insertmode = NOT_SET_VALUES;
21639ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
21739ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
2188965ea79SLois Curfman McInnes 
219227d817aSBarry Smith   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
22039ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
2218965ea79SLois Curfman McInnes   }
2228965ea79SLois Curfman McInnes   return 0;
2238965ea79SLois Curfman McInnes }
2248965ea79SLois Curfman McInnes 
22539ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A)
2268965ea79SLois Curfman McInnes {
22739ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
22839ddd567SLois Curfman McInnes   return MatZeroEntries(l->A);
2298965ea79SLois Curfman McInnes }
2308965ea79SLois Curfman McInnes 
2318965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
2328965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
2338965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
2343501a2bdSLois Curfman McInnes    mdn->A and mdn->B directly and not through the MatZeroRows()
2358965ea79SLois Curfman McInnes    routine.
2368965ea79SLois Curfman McInnes */
23739ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
2388965ea79SLois Curfman McInnes {
23939ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
2408965ea79SLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
2418965ea79SLois Curfman McInnes   int            *procs,*nprocs,j,found,idx,nsends,*work;
2428965ea79SLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
2438965ea79SLois Curfman McInnes   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
2448965ea79SLois Curfman McInnes   int            *lens,imdex,*lrows,*values;
2458965ea79SLois Curfman McInnes   MPI_Comm       comm = A->comm;
2468965ea79SLois Curfman McInnes   MPI_Request    *send_waits,*recv_waits;
2478965ea79SLois Curfman McInnes   MPI_Status     recv_status,*send_status;
2488965ea79SLois Curfman McInnes   IS             istmp;
2498965ea79SLois Curfman McInnes 
2508965ea79SLois Curfman McInnes   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
2518965ea79SLois Curfman McInnes   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
2528965ea79SLois Curfman McInnes 
2538965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
2540452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
255cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
2560452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
2578965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2588965ea79SLois Curfman McInnes     idx = rows[i];
2598965ea79SLois Curfman McInnes     found = 0;
2608965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2618965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
2628965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2638965ea79SLois Curfman McInnes       }
2648965ea79SLois Curfman McInnes     }
26539ddd567SLois Curfman McInnes     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
2668965ea79SLois Curfman McInnes   }
2678965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2688965ea79SLois Curfman McInnes 
2698965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
2700452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
2718965ea79SLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
2728965ea79SLois Curfman McInnes   nrecvs = work[rank];
2738965ea79SLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
2748965ea79SLois Curfman McInnes   nmax = work[rank];
2750452661fSBarry Smith   PetscFree(work);
2768965ea79SLois Curfman McInnes 
2778965ea79SLois Curfman McInnes   /* post receives:   */
2780452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
2798965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
2800452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
2818965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
2828965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
2838965ea79SLois Curfman McInnes     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2848965ea79SLois Curfman McInnes   }
2858965ea79SLois Curfman McInnes 
2868965ea79SLois Curfman McInnes   /* do sends:
2878965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
2888965ea79SLois Curfman McInnes          the ith processor
2898965ea79SLois Curfman McInnes   */
2900452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
2910452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
2928965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
2930452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
2948965ea79SLois Curfman McInnes   starts[0] = 0;
2958965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2968965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2978965ea79SLois Curfman McInnes     svalues[starts[owner[i]]++] = rows[i];
2988965ea79SLois Curfman McInnes   }
2998965ea79SLois Curfman McInnes   ISRestoreIndices(is,&rows);
3008965ea79SLois Curfman McInnes 
3018965ea79SLois Curfman McInnes   starts[0] = 0;
3028965ea79SLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3038965ea79SLois Curfman McInnes   count = 0;
3048965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3058965ea79SLois Curfman McInnes     if (procs[i]) {
3068965ea79SLois Curfman McInnes       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3078965ea79SLois Curfman McInnes     }
3088965ea79SLois Curfman McInnes   }
3090452661fSBarry Smith   PetscFree(starts);
3108965ea79SLois Curfman McInnes 
3118965ea79SLois Curfman McInnes   base = owners[rank];
3128965ea79SLois Curfman McInnes 
3138965ea79SLois Curfman McInnes   /*  wait on receives */
3140452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3158965ea79SLois Curfman McInnes   source = lens + nrecvs;
3168965ea79SLois Curfman McInnes   count  = nrecvs; slen = 0;
3178965ea79SLois Curfman McInnes   while (count) {
3188965ea79SLois Curfman McInnes     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3198965ea79SLois Curfman McInnes     /* unpack receives into our local space */
3208965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPI_INT,&n);
3218965ea79SLois Curfman McInnes     source[imdex]  = recv_status.MPI_SOURCE;
3228965ea79SLois Curfman McInnes     lens[imdex]  = n;
3238965ea79SLois Curfman McInnes     slen += n;
3248965ea79SLois Curfman McInnes     count--;
3258965ea79SLois Curfman McInnes   }
3260452661fSBarry Smith   PetscFree(recv_waits);
3278965ea79SLois Curfman McInnes 
3288965ea79SLois Curfman McInnes   /* move the data into the send scatter */
3290452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
3308965ea79SLois Curfman McInnes   count = 0;
3318965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
3328965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
3338965ea79SLois Curfman McInnes     for ( j=0; j<lens[i]; j++ ) {
3348965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
3358965ea79SLois Curfman McInnes     }
3368965ea79SLois Curfman McInnes   }
3370452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
3380452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
3398965ea79SLois Curfman McInnes 
3408965ea79SLois Curfman McInnes   /* actually zap the local rows */
3418965ea79SLois Curfman McInnes   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
3428965ea79SLois Curfman McInnes   PLogObjectParent(A,istmp);
3430452661fSBarry Smith   PetscFree(lrows);
3448965ea79SLois Curfman McInnes   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
3458965ea79SLois Curfman McInnes   ierr = ISDestroy(istmp); CHKERRQ(ierr);
3468965ea79SLois Curfman McInnes 
3478965ea79SLois Curfman McInnes   /* wait on sends */
3488965ea79SLois Curfman McInnes   if (nsends) {
3490452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
3508965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
3518965ea79SLois Curfman McInnes     MPI_Waitall(nsends,send_waits,send_status);
3520452661fSBarry Smith     PetscFree(send_status);
3538965ea79SLois Curfman McInnes   }
3540452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
3558965ea79SLois Curfman McInnes 
3568965ea79SLois Curfman McInnes   return 0;
3578965ea79SLois Curfman McInnes }
3588965ea79SLois Curfman McInnes 
35939ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
3608965ea79SLois Curfman McInnes {
36139ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3628965ea79SLois Curfman McInnes   int          ierr;
363c456f294SBarry Smith 
36439ddd567SLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
36539ddd567SLois Curfman McInnes   CHKERRQ(ierr);
36639ddd567SLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
36739ddd567SLois Curfman McInnes   CHKERRQ(ierr);
36839ddd567SLois Curfman McInnes   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
3698965ea79SLois Curfman McInnes   return 0;
3708965ea79SLois Curfman McInnes }
3718965ea79SLois Curfman McInnes 
37239ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
3738965ea79SLois Curfman McInnes {
37439ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3758965ea79SLois Curfman McInnes   int          ierr;
376c456f294SBarry Smith 
377c456f294SBarry Smith   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
378c456f294SBarry Smith   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
37939ddd567SLois Curfman McInnes   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
3808965ea79SLois Curfman McInnes   return 0;
3818965ea79SLois Curfman McInnes }
3828965ea79SLois Curfman McInnes 
383096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
384096963f5SLois Curfman McInnes {
385096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
386096963f5SLois Curfman McInnes   int          ierr;
3873501a2bdSLois Curfman McInnes   Scalar       zero = 0.0;
388096963f5SLois Curfman McInnes 
3893501a2bdSLois Curfman McInnes   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
390096963f5SLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
391096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
392096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
393096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
394096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
395096963f5SLois Curfman McInnes   return 0;
396096963f5SLois Curfman McInnes }
397096963f5SLois Curfman McInnes 
398096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
399096963f5SLois Curfman McInnes {
400096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
401096963f5SLois Curfman McInnes   int          ierr;
402096963f5SLois Curfman McInnes 
4033501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
4043501a2bdSLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
405096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
406096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
407096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
408096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
409096963f5SLois Curfman McInnes   return 0;
410096963f5SLois Curfman McInnes }
411096963f5SLois Curfman McInnes 
41239ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v)
4138965ea79SLois Curfman McInnes {
41439ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
415096963f5SLois Curfman McInnes   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
416096963f5SLois Curfman McInnes   int          ierr, i, n, m = a->m, radd;
417096963f5SLois Curfman McInnes   Scalar       *x;
418ed3cc1f0SBarry Smith 
419096963f5SLois Curfman McInnes   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
420096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
421096963f5SLois Curfman McInnes   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
422096963f5SLois Curfman McInnes   radd = a->rstart*m*m;
423096963f5SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
424096963f5SLois Curfman McInnes     x[i] = aloc->v[radd + i*m + i];
425096963f5SLois Curfman McInnes   }
426096963f5SLois Curfman McInnes   return 0;
4278965ea79SLois Curfman McInnes }
4288965ea79SLois Curfman McInnes 
42939ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj)
4308965ea79SLois Curfman McInnes {
4318965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
4323501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4338965ea79SLois Curfman McInnes   int          ierr;
434ed3cc1f0SBarry Smith 
4358965ea79SLois Curfman McInnes #if defined(PETSC_LOG)
4363501a2bdSLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
4378965ea79SLois Curfman McInnes #endif
4380452661fSBarry Smith   PetscFree(mdn->rowners);
4393501a2bdSLois Curfman McInnes   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
4403501a2bdSLois Curfman McInnes   if (mdn->lvec)   VecDestroy(mdn->lvec);
4413501a2bdSLois Curfman McInnes   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
442622d7880SLois Curfman McInnes   if (mdn->factor) {
443622d7880SLois Curfman McInnes     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
444622d7880SLois Curfman McInnes     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
445622d7880SLois Curfman McInnes     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
446622d7880SLois Curfman McInnes     PetscFree(mdn->factor);
447622d7880SLois Curfman McInnes   }
4480452661fSBarry Smith   PetscFree(mdn);
4498965ea79SLois Curfman McInnes   PLogObjectDestroy(mat);
4500452661fSBarry Smith   PetscHeaderDestroy(mat);
4518965ea79SLois Curfman McInnes   return 0;
4528965ea79SLois Curfman McInnes }
45339ddd567SLois Curfman McInnes 
4548965ea79SLois Curfman McInnes #include "pinclude/pviewer.h"
4558965ea79SLois Curfman McInnes 
45639ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
4578965ea79SLois Curfman McInnes {
45839ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4598965ea79SLois Curfman McInnes   int          ierr;
46039ddd567SLois Curfman McInnes   if (mdn->size == 1) {
46139ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4628965ea79SLois Curfman McInnes   }
46339ddd567SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
4648965ea79SLois Curfman McInnes   return 0;
4658965ea79SLois Curfman McInnes }
4668965ea79SLois Curfman McInnes 
46739ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
4688965ea79SLois Curfman McInnes {
46939ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
47039ddd567SLois Curfman McInnes   int          ierr, format;
4718965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
4728965ea79SLois Curfman McInnes   FILE         *fd;
4738965ea79SLois Curfman McInnes 
474227d817aSBarry Smith   ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
4758965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
4768965ea79SLois Curfman McInnes     ierr = ViewerFileGetFormat_Private(viewer,&format);
4773501a2bdSLois Curfman McInnes     if (format == FILE_FORMAT_INFO_DETAILED) {
478096963f5SLois Curfman McInnes       int nz, nzalloc, mem, rank;
479096963f5SLois Curfman McInnes       MPI_Comm_rank(mat->comm,&rank);
480096963f5SLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
481096963f5SLois Curfman McInnes       MPIU_Seq_begin(mat->comm,1);
4823501a2bdSLois Curfman McInnes         fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
483096963f5SLois Curfman McInnes             rank,mdn->m,nz,nzalloc,mem);
484096963f5SLois Curfman McInnes       fflush(fd);
485096963f5SLois Curfman McInnes       MPIU_Seq_end(mat->comm,1);
4863501a2bdSLois Curfman McInnes       ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
4873501a2bdSLois Curfman McInnes       return 0;
4883501a2bdSLois Curfman McInnes     }
4893501a2bdSLois Curfman McInnes     else if (format == FILE_FORMAT_INFO) {
490096963f5SLois Curfman McInnes       return 0;
4918965ea79SLois Curfman McInnes     }
4928965ea79SLois Curfman McInnes   }
4938965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER) {
4948965ea79SLois Curfman McInnes     MPIU_Seq_begin(mat->comm,1);
49539ddd567SLois Curfman McInnes     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
49639ddd567SLois Curfman McInnes              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
49739ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4988965ea79SLois Curfman McInnes     fflush(fd);
4998965ea79SLois Curfman McInnes     MPIU_Seq_end(mat->comm,1);
5008965ea79SLois Curfman McInnes   }
5018965ea79SLois Curfman McInnes   else {
50239ddd567SLois Curfman McInnes     int size = mdn->size, rank = mdn->rank;
5038965ea79SLois Curfman McInnes     if (size == 1) {
50439ddd567SLois Curfman McInnes       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
5058965ea79SLois Curfman McInnes     }
5068965ea79SLois Curfman McInnes     else {
5078965ea79SLois Curfman McInnes       /* assemble the entire matrix onto first processor. */
5088965ea79SLois Curfman McInnes       Mat          A;
50939ddd567SLois Curfman McInnes       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
51039ddd567SLois Curfman McInnes       Scalar       *vals;
51139ddd567SLois Curfman McInnes       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
5128965ea79SLois Curfman McInnes 
5138965ea79SLois Curfman McInnes       if (!rank) {
514b4fd4287SBarry Smith         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
5158965ea79SLois Curfman McInnes       }
5168965ea79SLois Curfman McInnes       else {
517b4fd4287SBarry Smith         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
5188965ea79SLois Curfman McInnes       }
5198965ea79SLois Curfman McInnes       PLogObjectParent(mat,A);
5208965ea79SLois Curfman McInnes 
52139ddd567SLois Curfman McInnes       /* Copy the matrix ... This isn't the most efficient means,
52239ddd567SLois Curfman McInnes          but it's quick for now */
52339ddd567SLois Curfman McInnes       row = mdn->rstart; m = Amdn->m;
5248965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
52539ddd567SLois Curfman McInnes         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
52639ddd567SLois Curfman McInnes         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
52739ddd567SLois Curfman McInnes         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
52839ddd567SLois Curfman McInnes         row++;
5298965ea79SLois Curfman McInnes       }
5308965ea79SLois Curfman McInnes 
5318965ea79SLois Curfman McInnes       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5328965ea79SLois Curfman McInnes       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5338965ea79SLois Curfman McInnes       if (!rank) {
53439ddd567SLois Curfman McInnes         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
5358965ea79SLois Curfman McInnes       }
5368965ea79SLois Curfman McInnes       ierr = MatDestroy(A); CHKERRQ(ierr);
5378965ea79SLois Curfman McInnes     }
5388965ea79SLois Curfman McInnes   }
5398965ea79SLois Curfman McInnes   return 0;
5408965ea79SLois Curfman McInnes }
5418965ea79SLois Curfman McInnes 
54239ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer)
5438965ea79SLois Curfman McInnes {
5448965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
5458965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
54639ddd567SLois Curfman McInnes   int          ierr;
5478965ea79SLois Curfman McInnes 
5488965ea79SLois Curfman McInnes   if (!viewer) {
5498965ea79SLois Curfman McInnes     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
5508965ea79SLois Curfman McInnes   }
55139ddd567SLois Curfman McInnes   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
55239ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5538965ea79SLois Curfman McInnes   }
5548965ea79SLois Curfman McInnes   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
55539ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5568965ea79SLois Curfman McInnes   }
5578965ea79SLois Curfman McInnes   else if (vobj->type == BINARY_FILE_VIEWER) {
55839ddd567SLois Curfman McInnes     return MatView_MPIDense_Binary(mat,viewer);
5598965ea79SLois Curfman McInnes   }
5608965ea79SLois Curfman McInnes   return 0;
5618965ea79SLois Curfman McInnes }
5628965ea79SLois Curfman McInnes 
5633501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,
5648965ea79SLois Curfman McInnes                              int *nzalloc,int *mem)
5658965ea79SLois Curfman McInnes {
5663501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5673501a2bdSLois Curfman McInnes   Mat          mdn = mat->A;
56839ddd567SLois Curfman McInnes   int          ierr, isend[3], irecv[3];
5698965ea79SLois Curfman McInnes 
5703501a2bdSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
5718965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
5728965ea79SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
5738965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
5743501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
5758965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5768965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
5773501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
5788965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5798965ea79SLois Curfman McInnes   }
5808965ea79SLois Curfman McInnes   return 0;
5818965ea79SLois Curfman McInnes }
5828965ea79SLois Curfman McInnes 
5838c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
5848aaee692SLois Curfman McInnes    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
5858aaee692SLois Curfman McInnes    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
5868aaee692SLois Curfman McInnes    extern int MatSolve_MPIDense(Mat,Vec,Vec);
5878c469469SLois Curfman McInnes    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
5888aaee692SLois Curfman McInnes    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
5898aaee692SLois Curfman McInnes    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
5908aaee692SLois Curfman McInnes 
59139ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op)
5928965ea79SLois Curfman McInnes {
59339ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
5948965ea79SLois Curfman McInnes 
5958965ea79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
5968965ea79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
5978965ea79SLois Curfman McInnes       op == COLUMNS_SORTED ||
5988965ea79SLois Curfman McInnes       op == ROW_ORIENTED) {
5998965ea79SLois Curfman McInnes         MatSetOption(a->A,op);
6008965ea79SLois Curfman McInnes   }
6018965ea79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
6028965ea79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
6038965ea79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
6048965ea79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
60539ddd567SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
6068965ea79SLois Curfman McInnes   else if (op == COLUMN_ORIENTED)
60739b7565bSBarry Smith     { a->roworiented = 0; MatSetOption(a->A,op);}
6088965ea79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
60939ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
6108965ea79SLois Curfman McInnes   else
61139ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
6128965ea79SLois Curfman McInnes   return 0;
6138965ea79SLois Curfman McInnes }
6148965ea79SLois Curfman McInnes 
6153501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n)
6168965ea79SLois Curfman McInnes {
6173501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6188965ea79SLois Curfman McInnes   *m = mat->M; *n = mat->N;
6198965ea79SLois Curfman McInnes   return 0;
6208965ea79SLois Curfman McInnes }
6218965ea79SLois Curfman McInnes 
6223501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
6238965ea79SLois Curfman McInnes {
6243501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6258965ea79SLois Curfman McInnes   *m = mat->m; *n = mat->N;
6268965ea79SLois Curfman McInnes   return 0;
6278965ea79SLois Curfman McInnes }
6288965ea79SLois Curfman McInnes 
6293501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
6308965ea79SLois Curfman McInnes {
6313501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6328965ea79SLois Curfman McInnes   *m = mat->rstart; *n = mat->rend;
6338965ea79SLois Curfman McInnes   return 0;
6348965ea79SLois Curfman McInnes }
6358965ea79SLois Curfman McInnes 
6363501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
6378965ea79SLois Curfman McInnes {
6383501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
63939ddd567SLois Curfman McInnes   int          lrow, rstart = mat->rstart, rend = mat->rend;
6408965ea79SLois Curfman McInnes 
64139ddd567SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
6428965ea79SLois Curfman McInnes   lrow = row - rstart;
64339ddd567SLois Curfman McInnes   return MatGetRow(mat->A,lrow,nz,idx,v);
6448965ea79SLois Curfman McInnes }
6458965ea79SLois Curfman McInnes 
64639ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
6478965ea79SLois Curfman McInnes {
6480452661fSBarry Smith   if (idx) PetscFree(*idx);
6490452661fSBarry Smith   if (v) PetscFree(*v);
6508965ea79SLois Curfman McInnes   return 0;
6518965ea79SLois Curfman McInnes }
6528965ea79SLois Curfman McInnes 
653cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
654096963f5SLois Curfman McInnes {
6553501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
6563501a2bdSLois Curfman McInnes   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
6573501a2bdSLois Curfman McInnes   int          ierr, i, j;
6583501a2bdSLois Curfman McInnes   double       sum = 0.0;
6593501a2bdSLois Curfman McInnes   Scalar       *v = mat->v;
6603501a2bdSLois Curfman McInnes 
6613501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
6623501a2bdSLois Curfman McInnes     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
6633501a2bdSLois Curfman McInnes   } else {
6643501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
6653501a2bdSLois Curfman McInnes       for (i=0; i<mat->n*mat->m; i++ ) {
6663501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX)
6673501a2bdSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
6683501a2bdSLois Curfman McInnes #else
6693501a2bdSLois Curfman McInnes         sum += (*v)*(*v); v++;
6703501a2bdSLois Curfman McInnes #endif
6713501a2bdSLois Curfman McInnes       }
672*6d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
6733501a2bdSLois Curfman McInnes       *norm = sqrt(*norm);
6743501a2bdSLois Curfman McInnes       PLogFlops(2*mat->n*mat->m);
6753501a2bdSLois Curfman McInnes     }
6763501a2bdSLois Curfman McInnes     else if (type == NORM_1) {
6773501a2bdSLois Curfman McInnes       double *tmp, *tmp2;
6780452661fSBarry Smith       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
6793501a2bdSLois Curfman McInnes       tmp2 = tmp + mdn->N;
680cddf8d76SBarry Smith       PetscMemzero(tmp,2*mdn->N*sizeof(double));
681096963f5SLois Curfman McInnes       *norm = 0.0;
6823501a2bdSLois Curfman McInnes       v = mat->v;
6833501a2bdSLois Curfman McInnes       for ( j=0; j<mat->n; j++ ) {
6843501a2bdSLois Curfman McInnes         for ( i=0; i<mat->m; i++ ) {
68567e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
6863501a2bdSLois Curfman McInnes         }
6873501a2bdSLois Curfman McInnes       }
688*6d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
6893501a2bdSLois Curfman McInnes       for ( j=0; j<mdn->N; j++ ) {
6903501a2bdSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
6913501a2bdSLois Curfman McInnes       }
6920452661fSBarry Smith       PetscFree(tmp);
6933501a2bdSLois Curfman McInnes       PLogFlops(mat->n*mat->m);
6943501a2bdSLois Curfman McInnes     }
6953501a2bdSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
6963501a2bdSLois Curfman McInnes       double ntemp;
6973501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
698*6d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
6993501a2bdSLois Curfman McInnes     }
7003501a2bdSLois Curfman McInnes     else {
7013501a2bdSLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
7023501a2bdSLois Curfman McInnes     }
7033501a2bdSLois Curfman McInnes   }
7043501a2bdSLois Curfman McInnes   return 0;
7053501a2bdSLois Curfman McInnes }
7063501a2bdSLois Curfman McInnes 
7073501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout)
7083501a2bdSLois Curfman McInnes {
7093501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
7103501a2bdSLois Curfman McInnes   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
7113501a2bdSLois Curfman McInnes   Mat          B;
7123501a2bdSLois Curfman McInnes   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
7133501a2bdSLois Curfman McInnes   int          j, i, ierr;
7143501a2bdSLois Curfman McInnes   Scalar       *v;
7153501a2bdSLois Curfman McInnes 
7163638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
7173501a2bdSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
718b4fd4287SBarry Smith   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);
719ed2daf61SLois Curfman McInnes          CHKERRQ(ierr);
7203501a2bdSLois Curfman McInnes 
7213501a2bdSLois Curfman McInnes   m = Aloc->m; n = Aloc->n; v = Aloc->v;
7220452661fSBarry Smith   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
7233501a2bdSLois Curfman McInnes   for ( j=0; j<n; j++ ) {
7243501a2bdSLois Curfman McInnes     for (i=0; i<m; i++) rwork[i] = rstart + i;
7253501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
7263501a2bdSLois Curfman McInnes     v += m;
7273501a2bdSLois Curfman McInnes   }
7280452661fSBarry Smith   PetscFree(rwork);
7293501a2bdSLois Curfman McInnes   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
7303501a2bdSLois Curfman McInnes   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
7313638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
7323501a2bdSLois Curfman McInnes     *matout = B;
7333501a2bdSLois Curfman McInnes   } else {
7343501a2bdSLois Curfman McInnes     /* This isn't really an in-place transpose, but free data struct from a */
7350452661fSBarry Smith     PetscFree(a->rowners);
7363501a2bdSLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
7373501a2bdSLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
7383501a2bdSLois Curfman McInnes     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
7390452661fSBarry Smith     PetscFree(a);
7403501a2bdSLois Curfman McInnes     PetscMemcpy(A,B,sizeof(struct _Mat));
7410452661fSBarry Smith     PetscHeaderDestroy(B);
7423501a2bdSLois Curfman McInnes   }
743096963f5SLois Curfman McInnes   return 0;
744096963f5SLois Curfman McInnes }
745096963f5SLois Curfman McInnes 
7463d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat,Mat *,int);
7478965ea79SLois Curfman McInnes 
7488965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
74939ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense,
75039ddd567SLois Curfman McInnes        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
75139ddd567SLois Curfman McInnes        MatMult_MPIDense,MatMultAdd_MPIDense,
752096963f5SLois Curfman McInnes        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
753e7ca0642SLois Curfman McInnes /*       MatSolve_MPIDense,0, */
754e7ca0642SLois Curfman McInnes        0,0,
75539ddd567SLois Curfman McInnes        0,0,
7568c469469SLois Curfman McInnes        0,0,
7578c469469SLois Curfman McInnes /*       MatLUFactor_MPIDense,0, */
7588aaee692SLois Curfman McInnes        0,MatTranspose_MPIDense,
75939ddd567SLois Curfman McInnes        MatGetInfo_MPIDense,0,
760096963f5SLois Curfman McInnes        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
76139ddd567SLois Curfman McInnes        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
7628965ea79SLois Curfman McInnes        0,
76339ddd567SLois Curfman McInnes        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
7648c469469SLois Curfman McInnes        0,0,0,
7658c469469SLois Curfman McInnes /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
7668aaee692SLois Curfman McInnes        0,0,
76739ddd567SLois Curfman McInnes        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
76839ddd567SLois Curfman McInnes        MatGetOwnershipRange_MPIDense,
76939ddd567SLois Curfman McInnes        0,0,
7703d1612f7SBarry Smith        0,0,0,0,0,MatConvertSameType_MPIDense,
771b49de8d1SLois Curfman McInnes        0,0,0,0,0,
772b49de8d1SLois Curfman McInnes        0,0,MatGetValues_MPIDense};
7738965ea79SLois Curfman McInnes 
7748965ea79SLois Curfman McInnes /*@C
77539ddd567SLois Curfman McInnes    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
7768965ea79SLois Curfman McInnes 
7778965ea79SLois Curfman McInnes    Input Parameters:
7788965ea79SLois Curfman McInnes .  comm - MPI communicator
7798965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
7808965ea79SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
7818965ea79SLois Curfman McInnes            if N is given)
7828965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
7838965ea79SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
7848965ea79SLois Curfman McInnes            if n is given)
785b4fd4287SBarry Smith .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
786dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
7878965ea79SLois Curfman McInnes 
7888965ea79SLois Curfman McInnes    Output Parameter:
7898965ea79SLois Curfman McInnes .  newmat - the matrix
7908965ea79SLois Curfman McInnes 
7918965ea79SLois Curfman McInnes    Notes:
79239ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
79339ddd567SLois Curfman McInnes    storage by columns.
7948965ea79SLois Curfman McInnes 
79518f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
79618f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
797b4fd4287SBarry Smith    set data=PETSC_NULL.
79818f449edSLois Curfman McInnes 
7998965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
8008965ea79SLois Curfman McInnes    (possibly both).
8018965ea79SLois Curfman McInnes 
8023501a2bdSLois Curfman McInnes    Currently, the only parallel dense matrix decomposition is by rows,
8033501a2bdSLois Curfman McInnes    so that n=N and each submatrix owns all of the global columns.
8043501a2bdSLois Curfman McInnes 
80539ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel
8068965ea79SLois Curfman McInnes 
80739ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
8088965ea79SLois Curfman McInnes @*/
80918f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat)
8108965ea79SLois Curfman McInnes {
8118965ea79SLois Curfman McInnes   Mat          mat;
81239ddd567SLois Curfman McInnes   Mat_MPIDense *a;
81325cdf11fSBarry Smith   int          ierr, i,flg;
8148965ea79SLois Curfman McInnes 
815ed2daf61SLois Curfman McInnes /* Note:  For now, when data is specified above, this assumes the user correctly
816ed2daf61SLois Curfman McInnes    allocates the local dense storage space.  We should add error checking. */
81718f449edSLois Curfman McInnes 
8188965ea79SLois Curfman McInnes   *newmat         = 0;
8190452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
8208965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
8210452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
8228965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
82339ddd567SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIDense;
82439ddd567SLois Curfman McInnes   mat->view       = MatView_MPIDense;
8258965ea79SLois Curfman McInnes   mat->factor     = 0;
8268965ea79SLois Curfman McInnes 
827622d7880SLois Curfman McInnes   a->factor = 0;
8288965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
8298965ea79SLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
8308965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
8318965ea79SLois Curfman McInnes 
83239ddd567SLois Curfman McInnes   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
8338965ea79SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
83439ddd567SLois Curfman McInnes 
83539ddd567SLois Curfman McInnes   /* each row stores all columns */
83639ddd567SLois Curfman McInnes   if (N == PETSC_DECIDE) N = n;
837d7e8b826SBarry Smith   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
838d7e8b826SBarry Smith   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
8398965ea79SLois Curfman McInnes   a->N = N;
8408965ea79SLois Curfman McInnes   a->M = M;
84139ddd567SLois Curfman McInnes   a->m = m;
84239ddd567SLois Curfman McInnes   a->n = n;
8438965ea79SLois Curfman McInnes 
8448965ea79SLois Curfman McInnes   /* build local table of row and column ownerships */
845d7e8b826SBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
846d7e8b826SBarry Smith   a->cowners = a->rowners + a->size + 1;
847d7e8b826SBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
84839ddd567SLois Curfman McInnes                        sizeof(Mat_MPIDense));
8498965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
8508965ea79SLois Curfman McInnes   a->rowners[0] = 0;
8518965ea79SLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
8528965ea79SLois Curfman McInnes     a->rowners[i] += a->rowners[i-1];
8538965ea79SLois Curfman McInnes   }
8548965ea79SLois Curfman McInnes   a->rstart = a->rowners[a->rank];
8558965ea79SLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
856d7e8b826SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
857d7e8b826SBarry Smith   a->cowners[0] = 0;
858d7e8b826SBarry Smith   for ( i=2; i<=a->size; i++ ) {
859d7e8b826SBarry Smith     a->cowners[i] += a->cowners[i-1];
860d7e8b826SBarry Smith   }
8618965ea79SLois Curfman McInnes 
86218f449edSLois Curfman McInnes   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
8638965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
8648965ea79SLois Curfman McInnes 
8658965ea79SLois Curfman McInnes   /* build cache for off array entries formed */
8668965ea79SLois Curfman McInnes   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
8678965ea79SLois Curfman McInnes 
8688965ea79SLois Curfman McInnes   /* stuff used for matrix vector multiply */
8698965ea79SLois Curfman McInnes   a->lvec        = 0;
8708965ea79SLois Curfman McInnes   a->Mvctx       = 0;
87139b7565bSBarry Smith   a->roworiented = 1;
8728965ea79SLois Curfman McInnes 
8738965ea79SLois Curfman McInnes   *newmat = mat;
87425cdf11fSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
87525cdf11fSBarry Smith   if (flg) {
8768c469469SLois Curfman McInnes     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
8778c469469SLois Curfman McInnes   }
8788965ea79SLois Curfman McInnes   return 0;
8798965ea79SLois Curfman McInnes }
8808965ea79SLois Curfman McInnes 
8813d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
8828965ea79SLois Curfman McInnes {
8838965ea79SLois Curfman McInnes   Mat          mat;
8843501a2bdSLois Curfman McInnes   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
88539ddd567SLois Curfman McInnes   int          ierr;
8862ba99913SLois Curfman McInnes   FactorCtx    *factor;
8878965ea79SLois Curfman McInnes 
8888965ea79SLois Curfman McInnes   *newmat       = 0;
8890452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
8908965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
8910452661fSBarry Smith   mat->data     = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
8928965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
89339ddd567SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIDense;
89439ddd567SLois Curfman McInnes   mat->view     = MatView_MPIDense;
8953501a2bdSLois Curfman McInnes   mat->factor   = A->factor;
896c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
8978965ea79SLois Curfman McInnes 
8988965ea79SLois Curfman McInnes   a->m          = oldmat->m;
8998965ea79SLois Curfman McInnes   a->n          = oldmat->n;
9008965ea79SLois Curfman McInnes   a->M          = oldmat->M;
9018965ea79SLois Curfman McInnes   a->N          = oldmat->N;
9022ba99913SLois Curfman McInnes   if (oldmat->factor) {
9032ba99913SLois Curfman McInnes     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
9042ba99913SLois Curfman McInnes     /* copy factor contents ... add this code! */
9052ba99913SLois Curfman McInnes   } else a->factor = 0;
9068965ea79SLois Curfman McInnes 
9078965ea79SLois Curfman McInnes   a->rstart     = oldmat->rstart;
9088965ea79SLois Curfman McInnes   a->rend       = oldmat->rend;
9098965ea79SLois Curfman McInnes   a->size       = oldmat->size;
9108965ea79SLois Curfman McInnes   a->rank       = oldmat->rank;
9118965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
9128965ea79SLois Curfman McInnes 
9130452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
91439ddd567SLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
9158965ea79SLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
9168965ea79SLois Curfman McInnes   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
9178965ea79SLois Curfman McInnes 
9188965ea79SLois Curfman McInnes   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
9198965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->lvec);
92055659b69SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
9218965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->Mvctx);
9228965ea79SLois Curfman McInnes   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
9238965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
9248965ea79SLois Curfman McInnes   *newmat = mat;
9258965ea79SLois Curfman McInnes   return 0;
9268965ea79SLois Curfman McInnes }
9278965ea79SLois Curfman McInnes 
9288965ea79SLois Curfman McInnes #include "sysio.h"
9298965ea79SLois Curfman McInnes 
93039ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
9318965ea79SLois Curfman McInnes {
9328965ea79SLois Curfman McInnes   Mat          A;
9338965ea79SLois Curfman McInnes   int          i, nz, ierr, j,rstart, rend, fd;
9348965ea79SLois Curfman McInnes   Scalar       *vals,*svals;
9358965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) bview;
9368965ea79SLois Curfman McInnes   MPI_Comm     comm = vobj->comm;
9378965ea79SLois Curfman McInnes   MPI_Status   status;
9388965ea79SLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
9398965ea79SLois Curfman McInnes   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
9408965ea79SLois Curfman McInnes   int          tag = ((PetscObject)bview)->tag;
9418965ea79SLois Curfman McInnes 
9428965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
9438965ea79SLois Curfman McInnes   if (!rank) {
9448965ea79SLois Curfman McInnes     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
9458965ea79SLois Curfman McInnes     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
94639ddd567SLois Curfman McInnes     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
9478965ea79SLois Curfman McInnes   }
9488965ea79SLois Curfman McInnes 
9498965ea79SLois Curfman McInnes   MPI_Bcast(header+1,3,MPI_INT,0,comm);
9508965ea79SLois Curfman McInnes   M = header[1]; N = header[2];
9518965ea79SLois Curfman McInnes   /* determine ownership of all rows */
9528965ea79SLois Curfman McInnes   m = M/size + ((M % size) > rank);
9530452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
9548965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
9558965ea79SLois Curfman McInnes   rowners[0] = 0;
9568965ea79SLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
9578965ea79SLois Curfman McInnes     rowners[i] += rowners[i-1];
9588965ea79SLois Curfman McInnes   }
9598965ea79SLois Curfman McInnes   rstart = rowners[rank];
9608965ea79SLois Curfman McInnes   rend   = rowners[rank+1];
9618965ea79SLois Curfman McInnes 
9628965ea79SLois Curfman McInnes   /* distribute row lengths to all processors */
9630452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
9648965ea79SLois Curfman McInnes   offlens = ourlens + (rend-rstart);
9658965ea79SLois Curfman McInnes   if (!rank) {
9660452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
9678965ea79SLois Curfman McInnes     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
9680452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
9698965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
9708965ea79SLois Curfman McInnes     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
9710452661fSBarry Smith     PetscFree(sndcounts);
9728965ea79SLois Curfman McInnes   }
9738965ea79SLois Curfman McInnes   else {
9748965ea79SLois Curfman McInnes     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
9758965ea79SLois Curfman McInnes   }
9768965ea79SLois Curfman McInnes 
9778965ea79SLois Curfman McInnes   if (!rank) {
9788965ea79SLois Curfman McInnes     /* calculate the number of nonzeros on each processor */
9790452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
980cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
9818965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
9828965ea79SLois Curfman McInnes       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
9838965ea79SLois Curfman McInnes         procsnz[i] += rowlengths[j];
9848965ea79SLois Curfman McInnes       }
9858965ea79SLois Curfman McInnes     }
9860452661fSBarry Smith     PetscFree(rowlengths);
9878965ea79SLois Curfman McInnes 
9888965ea79SLois Curfman McInnes     /* determine max buffer needed and allocate it */
9898965ea79SLois Curfman McInnes     maxnz = 0;
9908965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
9910452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
9928965ea79SLois Curfman McInnes     }
9930452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
9948965ea79SLois Curfman McInnes 
9958965ea79SLois Curfman McInnes     /* read in my part of the matrix column indices  */
9968965ea79SLois Curfman McInnes     nz = procsnz[0];
9970452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
9988965ea79SLois Curfman McInnes     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
9998965ea79SLois Curfman McInnes 
10008965ea79SLois Curfman McInnes     /* read in every one elses and ship off */
10018965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
10028965ea79SLois Curfman McInnes       nz = procsnz[i];
10038965ea79SLois Curfman McInnes       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
10048965ea79SLois Curfman McInnes       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
10058965ea79SLois Curfman McInnes     }
10060452661fSBarry Smith     PetscFree(cols);
10078965ea79SLois Curfman McInnes   }
10088965ea79SLois Curfman McInnes   else {
10098965ea79SLois Curfman McInnes     /* determine buffer space needed for message */
10108965ea79SLois Curfman McInnes     nz = 0;
10118965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10128965ea79SLois Curfman McInnes       nz += ourlens[i];
10138965ea79SLois Curfman McInnes     }
10140452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
10158965ea79SLois Curfman McInnes 
10168965ea79SLois Curfman McInnes     /* receive message of column indices*/
10178965ea79SLois Curfman McInnes     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
10188965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPI_INT,&maxnz);
101939ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
10208965ea79SLois Curfman McInnes   }
10218965ea79SLois Curfman McInnes 
10228965ea79SLois Curfman McInnes   /* loop over local rows, determining number of off diagonal entries */
1023cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
10248965ea79SLois Curfman McInnes   jj = 0;
10258965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10268965ea79SLois Curfman McInnes     for ( j=0; j<ourlens[i]; j++ ) {
10278965ea79SLois Curfman McInnes       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
10288965ea79SLois Curfman McInnes       jj++;
10298965ea79SLois Curfman McInnes     }
10308965ea79SLois Curfman McInnes   }
10318965ea79SLois Curfman McInnes 
10328965ea79SLois Curfman McInnes   /* create our matrix */
10338965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10348965ea79SLois Curfman McInnes     ourlens[i] -= offlens[i];
10358965ea79SLois Curfman McInnes   }
103639ddd567SLois Curfman McInnes   if (type == MATMPIDENSE) {
1037b4fd4287SBarry Smith     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
10388965ea79SLois Curfman McInnes   }
10398965ea79SLois Curfman McInnes   A = *newmat;
10408965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10418965ea79SLois Curfman McInnes     ourlens[i] += offlens[i];
10428965ea79SLois Curfman McInnes   }
10438965ea79SLois Curfman McInnes 
10448965ea79SLois Curfman McInnes   if (!rank) {
10450452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
10468965ea79SLois Curfman McInnes 
10478965ea79SLois Curfman McInnes     /* read in my part of the matrix numerical values  */
10488965ea79SLois Curfman McInnes     nz = procsnz[0];
10498965ea79SLois Curfman McInnes     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
10508965ea79SLois Curfman McInnes 
10518965ea79SLois Curfman McInnes     /* insert into matrix */
10528965ea79SLois Curfman McInnes     jj      = rstart;
10538965ea79SLois Curfman McInnes     smycols = mycols;
10548965ea79SLois Curfman McInnes     svals   = vals;
10558965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10568965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10578965ea79SLois Curfman McInnes       smycols += ourlens[i];
10588965ea79SLois Curfman McInnes       svals   += ourlens[i];
10598965ea79SLois Curfman McInnes       jj++;
10608965ea79SLois Curfman McInnes     }
10618965ea79SLois Curfman McInnes 
10628965ea79SLois Curfman McInnes     /* read in other processors and ship out */
10638965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
10648965ea79SLois Curfman McInnes       nz = procsnz[i];
10658965ea79SLois Curfman McInnes       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
10668965ea79SLois Curfman McInnes       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
10678965ea79SLois Curfman McInnes     }
10680452661fSBarry Smith     PetscFree(procsnz);
10698965ea79SLois Curfman McInnes   }
10708965ea79SLois Curfman McInnes   else {
10718965ea79SLois Curfman McInnes     /* receive numeric values */
10720452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
10738965ea79SLois Curfman McInnes 
10748965ea79SLois Curfman McInnes     /* receive message of values*/
10758965ea79SLois Curfman McInnes     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
10768965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
107739ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
10788965ea79SLois Curfman McInnes 
10798965ea79SLois Curfman McInnes     /* insert into matrix */
10808965ea79SLois Curfman McInnes     jj      = rstart;
10818965ea79SLois Curfman McInnes     smycols = mycols;
10828965ea79SLois Curfman McInnes     svals   = vals;
10838965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10848965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10858965ea79SLois Curfman McInnes       smycols += ourlens[i];
10868965ea79SLois Curfman McInnes       svals   += ourlens[i];
10878965ea79SLois Curfman McInnes       jj++;
10888965ea79SLois Curfman McInnes     }
10898965ea79SLois Curfman McInnes   }
10900452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
10918965ea79SLois Curfman McInnes 
10928965ea79SLois Curfman McInnes   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
10938965ea79SLois Curfman McInnes   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
10948965ea79SLois Curfman McInnes   return 0;
10958965ea79SLois Curfman McInnes }
1096