xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 25cdf11f00656cd6142ccc252ca9e2f99af554b2)
18965ea79SLois Curfman McInnes #ifndef lint
2*25cdf11fSBarry Smith static char vcid[] = "$Id: mpidense.c,v 1.24 1996/01/09 01:13:54 curfman Exp bsmith $";
38965ea79SLois Curfman McInnes #endif
48965ea79SLois Curfman McInnes 
5ed3cc1f0SBarry Smith /*
6ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
7ed3cc1f0SBarry Smith */
8ed3cc1f0SBarry Smith 
98965ea79SLois Curfman McInnes #include "mpidense.h"
108965ea79SLois Curfman McInnes #include "vec/vecimpl.h"
118965ea79SLois Curfman McInnes #include "inline/spops.h"
128965ea79SLois Curfman McInnes 
1339ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
148965ea79SLois Curfman McInnes                                int *idxn,Scalar *v,InsertMode addv)
158965ea79SLois Curfman McInnes {
1639b7565bSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
1739b7565bSBarry Smith   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
1839b7565bSBarry Smith   int          roworiented = A->roworiented;
198965ea79SLois Curfman McInnes 
2039b7565bSBarry Smith   if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) {
2139ddd567SLois Curfman McInnes     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
228965ea79SLois Curfman McInnes   }
2339b7565bSBarry Smith   A->insertmode = addv;
248965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
2539ddd567SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
2639b7565bSBarry Smith     if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
278965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
288965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
2939b7565bSBarry Smith       if (roworiented) {
3039b7565bSBarry Smith         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
3139b7565bSBarry Smith       }
3239b7565bSBarry Smith       else {
338965ea79SLois Curfman McInnes         for ( j=0; j<n; j++ ) {
3439ddd567SLois Curfman McInnes           if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
3539b7565bSBarry Smith           if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large");
3639b7565bSBarry Smith           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
3739b7565bSBarry Smith         }
388965ea79SLois Curfman McInnes       }
398965ea79SLois Curfman McInnes     }
408965ea79SLois Curfman McInnes     else {
4139b7565bSBarry Smith       if (roworiented) {
4239b7565bSBarry Smith         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
4339b7565bSBarry Smith       }
4439b7565bSBarry Smith       else { /* must stash each seperately */
4539b7565bSBarry Smith         row = idxm[i];
4639b7565bSBarry Smith         for ( j=0; j<n; j++ ) {
4739b7565bSBarry Smith           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);
4839b7565bSBarry Smith                  CHKERRQ(ierr);
4939b7565bSBarry Smith         }
5039b7565bSBarry Smith       }
51b49de8d1SLois Curfman McInnes     }
52b49de8d1SLois Curfman McInnes   }
53b49de8d1SLois Curfman McInnes   return 0;
54b49de8d1SLois Curfman McInnes }
55b49de8d1SLois Curfman McInnes 
56b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
57b49de8d1SLois Curfman McInnes {
58b49de8d1SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
59b49de8d1SLois Curfman McInnes   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
60b49de8d1SLois Curfman McInnes 
61b49de8d1SLois Curfman McInnes   if (!mdn->assembled) SETERRQ(1,"MatGetValues_MPIDense:Not for unassembled matrix");
62b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
63b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row");
64b49de8d1SLois Curfman McInnes     if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large");
65b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
66b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
67b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
68b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column");
69b49de8d1SLois Curfman McInnes         if (idxn[j] >= mdn->N)
70b49de8d1SLois Curfman McInnes           SETERRQ(1,"MatGetValues_MPIDense:Column too large");
71b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
72b49de8d1SLois Curfman McInnes       }
73b49de8d1SLois Curfman McInnes     }
74b49de8d1SLois Curfman McInnes     else {
75b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported");
768965ea79SLois Curfman McInnes     }
778965ea79SLois Curfman McInnes   }
788965ea79SLois Curfman McInnes   return 0;
798965ea79SLois Curfman McInnes }
808965ea79SLois Curfman McInnes 
8139ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
828965ea79SLois Curfman McInnes {
8339ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
848965ea79SLois Curfman McInnes   MPI_Comm     comm = mat->comm;
8539ddd567SLois Curfman McInnes   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
868965ea79SLois Curfman McInnes   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
8739ddd567SLois Curfman McInnes   int          tag = mat->tag, *owner,*starts,count,ierr;
888965ea79SLois Curfman McInnes   InsertMode   addv;
8939ddd567SLois Curfman McInnes   MPI_Request  *send_waits,*recv_waits;
908965ea79SLois Curfman McInnes   Scalar       *rvalues,*svalues;
918965ea79SLois Curfman McInnes 
928965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
9339ddd567SLois Curfman McInnes   MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT,
9439ddd567SLois Curfman McInnes                 MPI_BOR,comm);
9539ddd567SLois Curfman McInnes   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
9639ddd567SLois Curfman McInnes     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
978965ea79SLois Curfman McInnes     }
9839ddd567SLois Curfman McInnes   mdn->insertmode = addv; /* in case this processor had no cache */
998965ea79SLois Curfman McInnes 
1008965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
1010452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
102cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1030452661fSBarry Smith   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
10439ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
10539ddd567SLois Curfman McInnes     idx = mdn->stash.idx[i];
1068965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1078965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
1088965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1098965ea79SLois Curfman McInnes       }
1108965ea79SLois Curfman McInnes     }
1118965ea79SLois Curfman McInnes   }
1128965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1138965ea79SLois Curfman McInnes 
1148965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
1150452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
11639ddd567SLois Curfman McInnes   MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm);
1178965ea79SLois Curfman McInnes   nreceives = work[rank];
11839ddd567SLois Curfman McInnes   MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm);
1198965ea79SLois Curfman McInnes   nmax = work[rank];
1200452661fSBarry Smith   PetscFree(work);
1218965ea79SLois Curfman McInnes 
1228965ea79SLois Curfman McInnes   /* post receives:
1238965ea79SLois Curfman McInnes        1) each message will consist of ordered pairs
1248965ea79SLois Curfman McInnes      (global index,value) we store the global index as a double
1258965ea79SLois Curfman McInnes      to simplify the message passing.
1268965ea79SLois Curfman McInnes        2) since we don't know how long each individual message is we
1278965ea79SLois Curfman McInnes      allocate the largest needed buffer for each receive. Potentially
1288965ea79SLois Curfman McInnes      this is a lot of wasted space.
1298965ea79SLois Curfman McInnes 
1308965ea79SLois Curfman McInnes        This could be done better.
1318965ea79SLois Curfman McInnes   */
1320452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
1338965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
1340452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
1358965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
1368965ea79SLois Curfman McInnes   for ( i=0; i<nreceives; i++ ) {
13739ddd567SLois Curfman McInnes     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1388965ea79SLois Curfman McInnes               comm,recv_waits+i);
1398965ea79SLois Curfman McInnes   }
1408965ea79SLois Curfman McInnes 
1418965ea79SLois Curfman McInnes   /* do sends:
1428965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
1438965ea79SLois Curfman McInnes          the ith processor
1448965ea79SLois Curfman McInnes   */
1450452661fSBarry Smith   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
14639ddd567SLois Curfman McInnes   CHKPTRQ(svalues);
1470452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
1488965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
1490452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1508965ea79SLois Curfman McInnes   starts[0] = 0;
1518965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
15239ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
15339ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
15439ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
15539ddd567SLois Curfman McInnes     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
1568965ea79SLois Curfman McInnes   }
1570452661fSBarry Smith   PetscFree(owner);
1588965ea79SLois Curfman McInnes   starts[0] = 0;
1598965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1608965ea79SLois Curfman McInnes   count = 0;
1618965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
1628965ea79SLois Curfman McInnes     if (procs[i]) {
16339ddd567SLois Curfman McInnes       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
1648965ea79SLois Curfman McInnes                 comm,send_waits+count++);
1658965ea79SLois Curfman McInnes     }
1668965ea79SLois Curfman McInnes   }
1670452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
1688965ea79SLois Curfman McInnes 
1698965ea79SLois Curfman McInnes   /* Free cache space */
17039ddd567SLois Curfman McInnes   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
1718965ea79SLois Curfman McInnes 
17239ddd567SLois Curfman McInnes   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
17339ddd567SLois Curfman McInnes   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
17439ddd567SLois Curfman McInnes   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
17539ddd567SLois Curfman McInnes   mdn->rmax       = nmax;
1768965ea79SLois Curfman McInnes 
1778965ea79SLois Curfman McInnes   return 0;
1788965ea79SLois Curfman McInnes }
17939ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat);
1808965ea79SLois Curfman McInnes 
18139ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
1828965ea79SLois Curfman McInnes {
18339ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1848965ea79SLois Curfman McInnes   MPI_Status   *send_status,recv_status;
18539ddd567SLois Curfman McInnes   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
1868965ea79SLois Curfman McInnes   Scalar       *values,val;
18739ddd567SLois Curfman McInnes   InsertMode   addv = mdn->insertmode;
1888965ea79SLois Curfman McInnes 
1898965ea79SLois Curfman McInnes   /*  wait on receives */
1908965ea79SLois Curfman McInnes   while (count) {
19139ddd567SLois Curfman McInnes     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
1928965ea79SLois Curfman McInnes     /* unpack receives into our local space */
19339ddd567SLois Curfman McInnes     values = mdn->rvalues + 3*imdex*mdn->rmax;
1948965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
1958965ea79SLois Curfman McInnes     n = n/3;
1968965ea79SLois Curfman McInnes     for ( i=0; i<n; i++ ) {
19739ddd567SLois Curfman McInnes       row = (int) PETSCREAL(values[3*i]) - mdn->rstart;
1988965ea79SLois Curfman McInnes       col = (int) PETSCREAL(values[3*i+1]);
1998965ea79SLois Curfman McInnes       val = values[3*i+2];
20039ddd567SLois Curfman McInnes       if (col >= 0 && col < mdn->N) {
20139ddd567SLois Curfman McInnes         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
2028965ea79SLois Curfman McInnes       }
20339ddd567SLois Curfman McInnes       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
2048965ea79SLois Curfman McInnes     }
2058965ea79SLois Curfman McInnes     count--;
2068965ea79SLois Curfman McInnes   }
2070452661fSBarry Smith   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
2088965ea79SLois Curfman McInnes 
2098965ea79SLois Curfman McInnes   /* wait on sends */
21039ddd567SLois Curfman McInnes   if (mdn->nsends) {
2110452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) );
2128965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
21339ddd567SLois Curfman McInnes     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
2140452661fSBarry Smith     PetscFree(send_status);
2158965ea79SLois Curfman McInnes   }
2160452661fSBarry Smith   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
2178965ea79SLois Curfman McInnes 
21839ddd567SLois Curfman McInnes   mdn->insertmode = NOT_SET_VALUES;
21939ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
22039ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
2218965ea79SLois Curfman McInnes 
22239ddd567SLois Curfman McInnes   if (!mdn->assembled && mode == FINAL_ASSEMBLY) {
22339ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
2248965ea79SLois Curfman McInnes   }
22539ddd567SLois Curfman McInnes   mdn->assembled = 1;
2268965ea79SLois Curfman McInnes   return 0;
2278965ea79SLois Curfman McInnes }
2288965ea79SLois Curfman McInnes 
22939ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A)
2308965ea79SLois Curfman McInnes {
23139ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
23239ddd567SLois Curfman McInnes   return MatZeroEntries(l->A);
2338965ea79SLois Curfman McInnes }
2348965ea79SLois Curfman McInnes 
2358965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
2368965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
2378965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
2383501a2bdSLois Curfman McInnes    mdn->A and mdn->B directly and not through the MatZeroRows()
2398965ea79SLois Curfman McInnes    routine.
2408965ea79SLois Curfman McInnes */
24139ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
2428965ea79SLois Curfman McInnes {
24339ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
2448965ea79SLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
2458965ea79SLois Curfman McInnes   int            *procs,*nprocs,j,found,idx,nsends,*work;
2468965ea79SLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
2478965ea79SLois Curfman McInnes   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
2488965ea79SLois Curfman McInnes   int            *lens,imdex,*lrows,*values;
2498965ea79SLois Curfman McInnes   MPI_Comm       comm = A->comm;
2508965ea79SLois Curfman McInnes   MPI_Request    *send_waits,*recv_waits;
2518965ea79SLois Curfman McInnes   MPI_Status     recv_status,*send_status;
2528965ea79SLois Curfman McInnes   IS             istmp;
2538965ea79SLois Curfman McInnes 
25439ddd567SLois Curfman McInnes   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix");
2558965ea79SLois Curfman McInnes   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
2568965ea79SLois Curfman McInnes   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
2578965ea79SLois Curfman McInnes 
2588965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
2590452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
260cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
2610452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
2628965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2638965ea79SLois Curfman McInnes     idx = rows[i];
2648965ea79SLois Curfman McInnes     found = 0;
2658965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2668965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
2678965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2688965ea79SLois Curfman McInnes       }
2698965ea79SLois Curfman McInnes     }
27039ddd567SLois Curfman McInnes     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
2718965ea79SLois Curfman McInnes   }
2728965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2738965ea79SLois Curfman McInnes 
2748965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
2750452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
2768965ea79SLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
2778965ea79SLois Curfman McInnes   nrecvs = work[rank];
2788965ea79SLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
2798965ea79SLois Curfman McInnes   nmax = work[rank];
2800452661fSBarry Smith   PetscFree(work);
2818965ea79SLois Curfman McInnes 
2828965ea79SLois Curfman McInnes   /* post receives:   */
2830452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
2848965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
2850452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
2868965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
2878965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
2888965ea79SLois Curfman McInnes     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2898965ea79SLois Curfman McInnes   }
2908965ea79SLois Curfman McInnes 
2918965ea79SLois Curfman McInnes   /* do sends:
2928965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
2938965ea79SLois Curfman McInnes          the ith processor
2948965ea79SLois Curfman McInnes   */
2950452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
2960452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
2978965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
2980452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
2998965ea79SLois Curfman McInnes   starts[0] = 0;
3008965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3018965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
3028965ea79SLois Curfman McInnes     svalues[starts[owner[i]]++] = rows[i];
3038965ea79SLois Curfman McInnes   }
3048965ea79SLois Curfman McInnes   ISRestoreIndices(is,&rows);
3058965ea79SLois Curfman McInnes 
3068965ea79SLois Curfman McInnes   starts[0] = 0;
3078965ea79SLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3088965ea79SLois Curfman McInnes   count = 0;
3098965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3108965ea79SLois Curfman McInnes     if (procs[i]) {
3118965ea79SLois Curfman McInnes       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3128965ea79SLois Curfman McInnes     }
3138965ea79SLois Curfman McInnes   }
3140452661fSBarry Smith   PetscFree(starts);
3158965ea79SLois Curfman McInnes 
3168965ea79SLois Curfman McInnes   base = owners[rank];
3178965ea79SLois Curfman McInnes 
3188965ea79SLois Curfman McInnes   /*  wait on receives */
3190452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3208965ea79SLois Curfman McInnes   source = lens + nrecvs;
3218965ea79SLois Curfman McInnes   count  = nrecvs; slen = 0;
3228965ea79SLois Curfman McInnes   while (count) {
3238965ea79SLois Curfman McInnes     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3248965ea79SLois Curfman McInnes     /* unpack receives into our local space */
3258965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPI_INT,&n);
3268965ea79SLois Curfman McInnes     source[imdex]  = recv_status.MPI_SOURCE;
3278965ea79SLois Curfman McInnes     lens[imdex]  = n;
3288965ea79SLois Curfman McInnes     slen += n;
3298965ea79SLois Curfman McInnes     count--;
3308965ea79SLois Curfman McInnes   }
3310452661fSBarry Smith   PetscFree(recv_waits);
3328965ea79SLois Curfman McInnes 
3338965ea79SLois Curfman McInnes   /* move the data into the send scatter */
3340452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
3358965ea79SLois Curfman McInnes   count = 0;
3368965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
3378965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
3388965ea79SLois Curfman McInnes     for ( j=0; j<lens[i]; j++ ) {
3398965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
3408965ea79SLois Curfman McInnes     }
3418965ea79SLois Curfman McInnes   }
3420452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
3430452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
3448965ea79SLois Curfman McInnes 
3458965ea79SLois Curfman McInnes   /* actually zap the local rows */
3468965ea79SLois Curfman McInnes   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
3478965ea79SLois Curfman McInnes   PLogObjectParent(A,istmp);
3480452661fSBarry Smith   PetscFree(lrows);
3498965ea79SLois Curfman McInnes   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
3508965ea79SLois Curfman McInnes   ierr = ISDestroy(istmp); CHKERRQ(ierr);
3518965ea79SLois Curfman McInnes 
3528965ea79SLois Curfman McInnes   /* wait on sends */
3538965ea79SLois Curfman McInnes   if (nsends) {
3540452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
3558965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
3568965ea79SLois Curfman McInnes     MPI_Waitall(nsends,send_waits,send_status);
3570452661fSBarry Smith     PetscFree(send_status);
3588965ea79SLois Curfman McInnes   }
3590452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
3608965ea79SLois Curfman McInnes 
3618965ea79SLois Curfman McInnes   return 0;
3628965ea79SLois Curfman McInnes }
3638965ea79SLois Curfman McInnes 
36439ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
3658965ea79SLois Curfman McInnes {
36639ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3678965ea79SLois Curfman McInnes   int          ierr;
36839ddd567SLois Curfman McInnes   if (!mdn->assembled)
36939ddd567SLois Curfman McInnes     SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first");
37039ddd567SLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
37139ddd567SLois Curfman McInnes   CHKERRQ(ierr);
37239ddd567SLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
37339ddd567SLois Curfman McInnes   CHKERRQ(ierr);
37439ddd567SLois Curfman McInnes   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
3758965ea79SLois Curfman McInnes   return 0;
3768965ea79SLois Curfman McInnes }
3778965ea79SLois Curfman McInnes 
37839ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
3798965ea79SLois Curfman McInnes {
38039ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3818965ea79SLois Curfman McInnes   int          ierr;
38239ddd567SLois Curfman McInnes   if (!mdn->assembled)
38339ddd567SLois Curfman McInnes     SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first");
3843501a2bdSLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
38539ddd567SLois Curfman McInnes   CHKERRQ(ierr);
3863501a2bdSLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
38739ddd567SLois Curfman McInnes   CHKERRQ(ierr);
38839ddd567SLois Curfman McInnes   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
3898965ea79SLois Curfman McInnes   return 0;
3908965ea79SLois Curfman McInnes }
3918965ea79SLois Curfman McInnes 
392096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
393096963f5SLois Curfman McInnes {
394096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
395096963f5SLois Curfman McInnes   int          ierr;
3963501a2bdSLois Curfman McInnes   Scalar       zero = 0.0;
397096963f5SLois Curfman McInnes 
398096963f5SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix");
3993501a2bdSLois Curfman McInnes   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
400096963f5SLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
401096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
402096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
403096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
404096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
405096963f5SLois Curfman McInnes   return 0;
406096963f5SLois Curfman McInnes }
407096963f5SLois Curfman McInnes 
408096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
409096963f5SLois Curfman McInnes {
410096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
411096963f5SLois Curfman McInnes   int          ierr;
412096963f5SLois Curfman McInnes 
413096963f5SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix");
4143501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
4153501a2bdSLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
416096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
417096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
418096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
419096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
420096963f5SLois Curfman McInnes   return 0;
421096963f5SLois Curfman McInnes }
422096963f5SLois Curfman McInnes 
42339ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v)
4248965ea79SLois Curfman McInnes {
42539ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
426096963f5SLois Curfman McInnes   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
427096963f5SLois Curfman McInnes   int          ierr, i, n, m = a->m, radd;
428096963f5SLois Curfman McInnes   Scalar       *x;
429ed3cc1f0SBarry Smith 
43039ddd567SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix");
431096963f5SLois Curfman McInnes   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
432096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
433096963f5SLois Curfman McInnes   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
434096963f5SLois Curfman McInnes   radd = a->rstart*m*m;
435096963f5SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
436096963f5SLois Curfman McInnes     x[i] = aloc->v[radd + i*m + i];
437096963f5SLois Curfman McInnes   }
438096963f5SLois Curfman McInnes   return 0;
4398965ea79SLois Curfman McInnes }
4408965ea79SLois Curfman McInnes 
44139ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj)
4428965ea79SLois Curfman McInnes {
4438965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
4443501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4458965ea79SLois Curfman McInnes   int          ierr;
446ed3cc1f0SBarry Smith 
4478965ea79SLois Curfman McInnes #if defined(PETSC_LOG)
4483501a2bdSLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
4498965ea79SLois Curfman McInnes #endif
4500452661fSBarry Smith   PetscFree(mdn->rowners);
4513501a2bdSLois Curfman McInnes   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
4523501a2bdSLois Curfman McInnes   if (mdn->lvec)   VecDestroy(mdn->lvec);
4533501a2bdSLois Curfman McInnes   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
454622d7880SLois Curfman McInnes   if (mdn->factor) {
455622d7880SLois Curfman McInnes     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
456622d7880SLois Curfman McInnes     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
457622d7880SLois Curfman McInnes     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
458622d7880SLois Curfman McInnes     PetscFree(mdn->factor);
459622d7880SLois Curfman McInnes   }
4600452661fSBarry Smith   PetscFree(mdn);
4618965ea79SLois Curfman McInnes   PLogObjectDestroy(mat);
4620452661fSBarry Smith   PetscHeaderDestroy(mat);
4638965ea79SLois Curfman McInnes   return 0;
4648965ea79SLois Curfman McInnes }
46539ddd567SLois Curfman McInnes 
4668965ea79SLois Curfman McInnes #include "pinclude/pviewer.h"
4678965ea79SLois Curfman McInnes 
46839ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
4698965ea79SLois Curfman McInnes {
47039ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4718965ea79SLois Curfman McInnes   int          ierr;
47239ddd567SLois Curfman McInnes   if (mdn->size == 1) {
47339ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4748965ea79SLois Curfman McInnes   }
47539ddd567SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
4768965ea79SLois Curfman McInnes   return 0;
4778965ea79SLois Curfman McInnes }
4788965ea79SLois Curfman McInnes 
47939ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
4808965ea79SLois Curfman McInnes {
48139ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
48239ddd567SLois Curfman McInnes   int          ierr, format;
4838965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
4848965ea79SLois Curfman McInnes   FILE         *fd;
4858965ea79SLois Curfman McInnes 
4863501a2bdSLois Curfman McInnes   ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
4878965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
4888965ea79SLois Curfman McInnes     ierr = ViewerFileGetFormat_Private(viewer,&format);
4893501a2bdSLois Curfman McInnes     if (format == FILE_FORMAT_INFO_DETAILED) {
490096963f5SLois Curfman McInnes       int nz, nzalloc, mem, rank;
491096963f5SLois Curfman McInnes       MPI_Comm_rank(mat->comm,&rank);
492096963f5SLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
493096963f5SLois Curfman McInnes       MPIU_Seq_begin(mat->comm,1);
4943501a2bdSLois Curfman McInnes         fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
495096963f5SLois Curfman McInnes             rank,mdn->m,nz,nzalloc,mem);
496096963f5SLois Curfman McInnes       fflush(fd);
497096963f5SLois Curfman McInnes       MPIU_Seq_end(mat->comm,1);
4983501a2bdSLois Curfman McInnes       ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
4993501a2bdSLois Curfman McInnes       return 0;
5003501a2bdSLois Curfman McInnes     }
5013501a2bdSLois Curfman McInnes     else if (format == FILE_FORMAT_INFO) {
502096963f5SLois Curfman McInnes       return 0;
5038965ea79SLois Curfman McInnes     }
5048965ea79SLois Curfman McInnes   }
5058965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER) {
5068965ea79SLois Curfman McInnes     MPIU_Seq_begin(mat->comm,1);
50739ddd567SLois Curfman McInnes     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
50839ddd567SLois Curfman McInnes              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
50939ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
5108965ea79SLois Curfman McInnes     fflush(fd);
5118965ea79SLois Curfman McInnes     MPIU_Seq_end(mat->comm,1);
5128965ea79SLois Curfman McInnes   }
5138965ea79SLois Curfman McInnes   else {
51439ddd567SLois Curfman McInnes     int size = mdn->size, rank = mdn->rank;
5158965ea79SLois Curfman McInnes     if (size == 1) {
51639ddd567SLois Curfman McInnes       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
5178965ea79SLois Curfman McInnes     }
5188965ea79SLois Curfman McInnes     else {
5198965ea79SLois Curfman McInnes       /* assemble the entire matrix onto first processor. */
5208965ea79SLois Curfman McInnes       Mat          A;
52139ddd567SLois Curfman McInnes       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
52239ddd567SLois Curfman McInnes       Scalar       *vals;
52339ddd567SLois Curfman McInnes       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
5248965ea79SLois Curfman McInnes 
5258965ea79SLois Curfman McInnes       if (!rank) {
526b4fd4287SBarry Smith         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
5278965ea79SLois Curfman McInnes       }
5288965ea79SLois Curfman McInnes       else {
529b4fd4287SBarry Smith         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
5308965ea79SLois Curfman McInnes       }
5318965ea79SLois Curfman McInnes       PLogObjectParent(mat,A);
5328965ea79SLois Curfman McInnes 
53339ddd567SLois Curfman McInnes       /* Copy the matrix ... This isn't the most efficient means,
53439ddd567SLois Curfman McInnes          but it's quick for now */
53539ddd567SLois Curfman McInnes       row = mdn->rstart; m = Amdn->m;
5368965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
53739ddd567SLois Curfman McInnes         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
53839ddd567SLois Curfman McInnes         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
53939ddd567SLois Curfman McInnes         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
54039ddd567SLois Curfman McInnes         row++;
5418965ea79SLois Curfman McInnes       }
5428965ea79SLois Curfman McInnes 
5438965ea79SLois Curfman McInnes       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5448965ea79SLois Curfman McInnes       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5458965ea79SLois Curfman McInnes       if (!rank) {
54639ddd567SLois Curfman McInnes         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
5478965ea79SLois Curfman McInnes       }
5488965ea79SLois Curfman McInnes       ierr = MatDestroy(A); CHKERRQ(ierr);
5498965ea79SLois Curfman McInnes     }
5508965ea79SLois Curfman McInnes   }
5518965ea79SLois Curfman McInnes   return 0;
5528965ea79SLois Curfman McInnes }
5538965ea79SLois Curfman McInnes 
55439ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer)
5558965ea79SLois Curfman McInnes {
5568965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
55739ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
5588965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
55939ddd567SLois Curfman McInnes   int          ierr;
5608965ea79SLois Curfman McInnes 
56139ddd567SLois Curfman McInnes   if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix");
5628965ea79SLois Curfman McInnes   if (!viewer) {
5638965ea79SLois Curfman McInnes     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
5648965ea79SLois Curfman McInnes   }
56539ddd567SLois Curfman McInnes   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
56639ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5678965ea79SLois Curfman McInnes   }
5688965ea79SLois Curfman McInnes   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
56939ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5708965ea79SLois Curfman McInnes   }
5718965ea79SLois Curfman McInnes   else if (vobj->type == BINARY_FILE_VIEWER) {
57239ddd567SLois Curfman McInnes     return MatView_MPIDense_Binary(mat,viewer);
5738965ea79SLois Curfman McInnes   }
5748965ea79SLois Curfman McInnes   return 0;
5758965ea79SLois Curfman McInnes }
5768965ea79SLois Curfman McInnes 
5773501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,
5788965ea79SLois Curfman McInnes                              int *nzalloc,int *mem)
5798965ea79SLois Curfman McInnes {
5803501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5813501a2bdSLois Curfman McInnes   Mat          mdn = mat->A;
58239ddd567SLois Curfman McInnes   int          ierr, isend[3], irecv[3];
5838965ea79SLois Curfman McInnes 
5843501a2bdSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
5858965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
5868965ea79SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
5878965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
5883501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
5898965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5908965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
5913501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
5928965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5938965ea79SLois Curfman McInnes   }
5948965ea79SLois Curfman McInnes   return 0;
5958965ea79SLois Curfman McInnes }
5968965ea79SLois Curfman McInnes 
5978c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
5988aaee692SLois Curfman McInnes    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
5998aaee692SLois Curfman McInnes    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
6008aaee692SLois Curfman McInnes    extern int MatSolve_MPIDense(Mat,Vec,Vec);
6018c469469SLois Curfman McInnes    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
6028aaee692SLois Curfman McInnes    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
6038aaee692SLois Curfman McInnes    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
6048aaee692SLois Curfman McInnes 
60539ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op)
6068965ea79SLois Curfman McInnes {
60739ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
6088965ea79SLois Curfman McInnes 
6098965ea79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
6108965ea79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
6118965ea79SLois Curfman McInnes       op == COLUMNS_SORTED ||
6128965ea79SLois Curfman McInnes       op == ROW_ORIENTED) {
6138965ea79SLois Curfman McInnes         MatSetOption(a->A,op);
6148965ea79SLois Curfman McInnes   }
6158965ea79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
6168965ea79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
6178965ea79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
6188965ea79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
61939ddd567SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
6208965ea79SLois Curfman McInnes   else if (op == COLUMN_ORIENTED)
62139b7565bSBarry Smith     { a->roworiented = 0; MatSetOption(a->A,op);}
6228965ea79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
62339ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
6248965ea79SLois Curfman McInnes   else
62539ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
6268965ea79SLois Curfman McInnes   return 0;
6278965ea79SLois Curfman McInnes }
6288965ea79SLois Curfman McInnes 
6293501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n)
6308965ea79SLois Curfman McInnes {
6313501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6328965ea79SLois Curfman McInnes   *m = mat->M; *n = mat->N;
6338965ea79SLois Curfman McInnes   return 0;
6348965ea79SLois Curfman McInnes }
6358965ea79SLois Curfman McInnes 
6363501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
6378965ea79SLois Curfman McInnes {
6383501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6398965ea79SLois Curfman McInnes   *m = mat->m; *n = mat->N;
6408965ea79SLois Curfman McInnes   return 0;
6418965ea79SLois Curfman McInnes }
6428965ea79SLois Curfman McInnes 
6433501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
6448965ea79SLois Curfman McInnes {
6453501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6468965ea79SLois Curfman McInnes   *m = mat->rstart; *n = mat->rend;
6478965ea79SLois Curfman McInnes   return 0;
6488965ea79SLois Curfman McInnes }
6498965ea79SLois Curfman McInnes 
6503501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
6518965ea79SLois Curfman McInnes {
6523501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
65339ddd567SLois Curfman McInnes   int          lrow, rstart = mat->rstart, rend = mat->rend;
6548965ea79SLois Curfman McInnes 
65539ddd567SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
6568965ea79SLois Curfman McInnes   lrow = row - rstart;
65739ddd567SLois Curfman McInnes   return MatGetRow(mat->A,lrow,nz,idx,v);
6588965ea79SLois Curfman McInnes }
6598965ea79SLois Curfman McInnes 
66039ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
6618965ea79SLois Curfman McInnes {
6620452661fSBarry Smith   if (idx) PetscFree(*idx);
6630452661fSBarry Smith   if (v) PetscFree(*v);
6648965ea79SLois Curfman McInnes   return 0;
6658965ea79SLois Curfman McInnes }
6668965ea79SLois Curfman McInnes 
667cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
668096963f5SLois Curfman McInnes {
6693501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
6703501a2bdSLois Curfman McInnes   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
6713501a2bdSLois Curfman McInnes   int          ierr, i, j;
6723501a2bdSLois Curfman McInnes   double       sum = 0.0;
6733501a2bdSLois Curfman McInnes   Scalar       *v = mat->v;
6743501a2bdSLois Curfman McInnes 
6753501a2bdSLois Curfman McInnes   if (!mdn->assembled) SETERRQ(1,"MatNorm_MPIDense:Must assemble matrix");
6763501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
6773501a2bdSLois Curfman McInnes     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
6783501a2bdSLois Curfman McInnes   } else {
6793501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
6803501a2bdSLois Curfman McInnes       for (i=0; i<mat->n*mat->m; i++ ) {
6813501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX)
6823501a2bdSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
6833501a2bdSLois Curfman McInnes #else
6843501a2bdSLois Curfman McInnes         sum += (*v)*(*v); v++;
6853501a2bdSLois Curfman McInnes #endif
6863501a2bdSLois Curfman McInnes       }
6873501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
6883501a2bdSLois Curfman McInnes       *norm = sqrt(*norm);
6893501a2bdSLois Curfman McInnes       PLogFlops(2*mat->n*mat->m);
6903501a2bdSLois Curfman McInnes     }
6913501a2bdSLois Curfman McInnes     else if (type == NORM_1) {
6923501a2bdSLois Curfman McInnes       double *tmp, *tmp2;
6930452661fSBarry Smith       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
6943501a2bdSLois Curfman McInnes       tmp2 = tmp + mdn->N;
695cddf8d76SBarry Smith       PetscMemzero(tmp,2*mdn->N*sizeof(double));
696096963f5SLois Curfman McInnes       *norm = 0.0;
6973501a2bdSLois Curfman McInnes       v = mat->v;
6983501a2bdSLois Curfman McInnes       for ( j=0; j<mat->n; j++ ) {
6993501a2bdSLois Curfman McInnes         for ( i=0; i<mat->m; i++ ) {
70067e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
7013501a2bdSLois Curfman McInnes         }
7023501a2bdSLois Curfman McInnes       }
7033501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
7043501a2bdSLois Curfman McInnes       for ( j=0; j<mdn->N; j++ ) {
7053501a2bdSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
7063501a2bdSLois Curfman McInnes       }
7070452661fSBarry Smith       PetscFree(tmp);
7083501a2bdSLois Curfman McInnes       PLogFlops(mat->n*mat->m);
7093501a2bdSLois Curfman McInnes     }
7103501a2bdSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
7113501a2bdSLois Curfman McInnes       double ntemp;
7123501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
7133501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
7143501a2bdSLois Curfman McInnes     }
7153501a2bdSLois Curfman McInnes     else {
7163501a2bdSLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
7173501a2bdSLois Curfman McInnes     }
7183501a2bdSLois Curfman McInnes   }
7193501a2bdSLois Curfman McInnes   return 0;
7203501a2bdSLois Curfman McInnes }
7213501a2bdSLois Curfman McInnes 
7223501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout)
7233501a2bdSLois Curfman McInnes {
7243501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
7253501a2bdSLois Curfman McInnes   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
7263501a2bdSLois Curfman McInnes   Mat          B;
7273501a2bdSLois Curfman McInnes   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
7283501a2bdSLois Curfman McInnes   int          j, i, ierr;
7293501a2bdSLois Curfman McInnes   Scalar       *v;
7303501a2bdSLois Curfman McInnes 
7313501a2bdSLois Curfman McInnes   if (!matout && M != N)
7323501a2bdSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
733b4fd4287SBarry Smith   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);
734ed2daf61SLois Curfman McInnes          CHKERRQ(ierr);
7353501a2bdSLois Curfman McInnes 
7363501a2bdSLois Curfman McInnes   m = Aloc->m; n = Aloc->n; v = Aloc->v;
7370452661fSBarry Smith   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
7383501a2bdSLois Curfman McInnes   for ( j=0; j<n; j++ ) {
7393501a2bdSLois Curfman McInnes     for (i=0; i<m; i++) rwork[i] = rstart + i;
7403501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
7413501a2bdSLois Curfman McInnes     v += m;
7423501a2bdSLois Curfman McInnes   }
7430452661fSBarry Smith   PetscFree(rwork);
7443501a2bdSLois Curfman McInnes   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
7453501a2bdSLois Curfman McInnes   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
7463501a2bdSLois Curfman McInnes   if (matout) {
7473501a2bdSLois Curfman McInnes     *matout = B;
7483501a2bdSLois Curfman McInnes   } else {
7493501a2bdSLois Curfman McInnes     /* This isn't really an in-place transpose, but free data struct from a */
7500452661fSBarry Smith     PetscFree(a->rowners);
7513501a2bdSLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
7523501a2bdSLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
7533501a2bdSLois Curfman McInnes     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
7540452661fSBarry Smith     PetscFree(a);
7553501a2bdSLois Curfman McInnes     PetscMemcpy(A,B,sizeof(struct _Mat));
7560452661fSBarry Smith     PetscHeaderDestroy(B);
7573501a2bdSLois Curfman McInnes   }
758096963f5SLois Curfman McInnes   return 0;
759096963f5SLois Curfman McInnes }
760096963f5SLois Curfman McInnes 
7613d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat,Mat *,int);
7628965ea79SLois Curfman McInnes 
7638965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
76439ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense,
76539ddd567SLois Curfman McInnes        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
76639ddd567SLois Curfman McInnes        MatMult_MPIDense,MatMultAdd_MPIDense,
767096963f5SLois Curfman McInnes        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
768e7ca0642SLois Curfman McInnes /*       MatSolve_MPIDense,0, */
769e7ca0642SLois Curfman McInnes        0,0,
77039ddd567SLois Curfman McInnes        0,0,
7718c469469SLois Curfman McInnes        0,0,
7728c469469SLois Curfman McInnes /*       MatLUFactor_MPIDense,0, */
7738aaee692SLois Curfman McInnes        0,MatTranspose_MPIDense,
77439ddd567SLois Curfman McInnes        MatGetInfo_MPIDense,0,
775096963f5SLois Curfman McInnes        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
77639ddd567SLois Curfman McInnes        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
7778965ea79SLois Curfman McInnes        0,
77839ddd567SLois Curfman McInnes        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
7798c469469SLois Curfman McInnes        0,0,0,
7808c469469SLois Curfman McInnes /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
7818aaee692SLois Curfman McInnes        0,0,
78239ddd567SLois Curfman McInnes        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
78339ddd567SLois Curfman McInnes        MatGetOwnershipRange_MPIDense,
78439ddd567SLois Curfman McInnes        0,0,
7853d1612f7SBarry Smith        0,0,0,0,0,MatConvertSameType_MPIDense,
786b49de8d1SLois Curfman McInnes        0,0,0,0,0,
787b49de8d1SLois Curfman McInnes        0,0,MatGetValues_MPIDense};
7888965ea79SLois Curfman McInnes 
7898965ea79SLois Curfman McInnes /*@C
79039ddd567SLois Curfman McInnes    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
7918965ea79SLois Curfman McInnes 
7928965ea79SLois Curfman McInnes    Input Parameters:
7938965ea79SLois Curfman McInnes .  comm - MPI communicator
7948965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
7958965ea79SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
7968965ea79SLois Curfman McInnes            if N is given)
7978965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
7988965ea79SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
7998965ea79SLois Curfman McInnes            if n is given)
800b4fd4287SBarry Smith .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
801dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
8028965ea79SLois Curfman McInnes 
8038965ea79SLois Curfman McInnes    Output Parameter:
8048965ea79SLois Curfman McInnes .  newmat - the matrix
8058965ea79SLois Curfman McInnes 
8068965ea79SLois Curfman McInnes    Notes:
80739ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
80839ddd567SLois Curfman McInnes    storage by columns.
8098965ea79SLois Curfman McInnes 
81018f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
81118f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
812b4fd4287SBarry Smith    set data=PETSC_NULL.
81318f449edSLois Curfman McInnes 
8148965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
8158965ea79SLois Curfman McInnes    (possibly both).
8168965ea79SLois Curfman McInnes 
8173501a2bdSLois Curfman McInnes    Currently, the only parallel dense matrix decomposition is by rows,
8183501a2bdSLois Curfman McInnes    so that n=N and each submatrix owns all of the global columns.
8193501a2bdSLois Curfman McInnes 
82039ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel
8218965ea79SLois Curfman McInnes 
82239ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
8238965ea79SLois Curfman McInnes @*/
82418f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat)
8258965ea79SLois Curfman McInnes {
8268965ea79SLois Curfman McInnes   Mat          mat;
82739ddd567SLois Curfman McInnes   Mat_MPIDense *a;
828*25cdf11fSBarry Smith   int          ierr, i,flg;
8298965ea79SLois Curfman McInnes 
830ed2daf61SLois Curfman McInnes /* Note:  For now, when data is specified above, this assumes the user correctly
831ed2daf61SLois Curfman McInnes    allocates the local dense storage space.  We should add error checking. */
83218f449edSLois Curfman McInnes 
8338965ea79SLois Curfman McInnes   *newmat         = 0;
8340452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
8358965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
8360452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
8378965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
83839ddd567SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIDense;
83939ddd567SLois Curfman McInnes   mat->view       = MatView_MPIDense;
8408965ea79SLois Curfman McInnes   mat->factor     = 0;
8418965ea79SLois Curfman McInnes 
842622d7880SLois Curfman McInnes   a->factor = 0;
8438965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
8448965ea79SLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
8458965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
8468965ea79SLois Curfman McInnes 
84739ddd567SLois Curfman McInnes   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
8488965ea79SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
84939ddd567SLois Curfman McInnes 
85039ddd567SLois Curfman McInnes   /* each row stores all columns */
85139ddd567SLois Curfman McInnes   if (N == PETSC_DECIDE) N = n;
852d7e8b826SBarry Smith   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
853d7e8b826SBarry Smith   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
8548965ea79SLois Curfman McInnes   a->N = N;
8558965ea79SLois Curfman McInnes   a->M = M;
85639ddd567SLois Curfman McInnes   a->m = m;
85739ddd567SLois Curfman McInnes   a->n = n;
8588965ea79SLois Curfman McInnes 
8598965ea79SLois Curfman McInnes   /* build local table of row and column ownerships */
860d7e8b826SBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
861d7e8b826SBarry Smith   a->cowners = a->rowners + a->size + 1;
862d7e8b826SBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
86339ddd567SLois Curfman McInnes                        sizeof(Mat_MPIDense));
8648965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
8658965ea79SLois Curfman McInnes   a->rowners[0] = 0;
8668965ea79SLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
8678965ea79SLois Curfman McInnes     a->rowners[i] += a->rowners[i-1];
8688965ea79SLois Curfman McInnes   }
8698965ea79SLois Curfman McInnes   a->rstart = a->rowners[a->rank];
8708965ea79SLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
871d7e8b826SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
872d7e8b826SBarry Smith   a->cowners[0] = 0;
873d7e8b826SBarry Smith   for ( i=2; i<=a->size; i++ ) {
874d7e8b826SBarry Smith     a->cowners[i] += a->cowners[i-1];
875d7e8b826SBarry Smith   }
8768965ea79SLois Curfman McInnes 
87718f449edSLois Curfman McInnes   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
8788965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
8798965ea79SLois Curfman McInnes 
8808965ea79SLois Curfman McInnes   /* build cache for off array entries formed */
8818965ea79SLois Curfman McInnes   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
8828965ea79SLois Curfman McInnes 
8838965ea79SLois Curfman McInnes   /* stuff used for matrix vector multiply */
8848965ea79SLois Curfman McInnes   a->lvec        = 0;
8858965ea79SLois Curfman McInnes   a->Mvctx       = 0;
8868965ea79SLois Curfman McInnes   a->assembled   = 0;
88739b7565bSBarry Smith   a->roworiented = 1;
8888965ea79SLois Curfman McInnes 
8898965ea79SLois Curfman McInnes   *newmat = mat;
890*25cdf11fSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
891*25cdf11fSBarry Smith   if (flg) {
8928c469469SLois Curfman McInnes     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
8938c469469SLois Curfman McInnes   }
8948965ea79SLois Curfman McInnes   return 0;
8958965ea79SLois Curfman McInnes }
8968965ea79SLois Curfman McInnes 
8973d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
8988965ea79SLois Curfman McInnes {
8998965ea79SLois Curfman McInnes   Mat          mat;
9003501a2bdSLois Curfman McInnes   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
90139ddd567SLois Curfman McInnes   int          ierr;
9022ba99913SLois Curfman McInnes   FactorCtx    *factor;
9038965ea79SLois Curfman McInnes 
9043d1612f7SBarry Smith   if (!oldmat->assembled) SETERRQ(1,"MatConvertSameType_MPIDense:Must assemble matrix");
9058965ea79SLois Curfman McInnes   *newmat       = 0;
9060452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
9078965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
9080452661fSBarry Smith   mat->data     = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
9098965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
91039ddd567SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIDense;
91139ddd567SLois Curfman McInnes   mat->view     = MatView_MPIDense;
9123501a2bdSLois Curfman McInnes   mat->factor   = A->factor;
9138965ea79SLois Curfman McInnes 
9148965ea79SLois Curfman McInnes   a->m          = oldmat->m;
9158965ea79SLois Curfman McInnes   a->n          = oldmat->n;
9168965ea79SLois Curfman McInnes   a->M          = oldmat->M;
9178965ea79SLois Curfman McInnes   a->N          = oldmat->N;
9182ba99913SLois Curfman McInnes   if (oldmat->factor) {
9192ba99913SLois Curfman McInnes     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
9202ba99913SLois Curfman McInnes     /* copy factor contents ... add this code! */
9212ba99913SLois Curfman McInnes   } else a->factor = 0;
9228965ea79SLois Curfman McInnes 
9238965ea79SLois Curfman McInnes   a->assembled  = 1;
9248965ea79SLois Curfman McInnes   a->rstart     = oldmat->rstart;
9258965ea79SLois Curfman McInnes   a->rend       = oldmat->rend;
9268965ea79SLois Curfman McInnes   a->size       = oldmat->size;
9278965ea79SLois Curfman McInnes   a->rank       = oldmat->rank;
9288965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
9298965ea79SLois Curfman McInnes 
9300452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
93139ddd567SLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
9328965ea79SLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
9338965ea79SLois Curfman McInnes   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
9348965ea79SLois Curfman McInnes 
9358965ea79SLois Curfman McInnes   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
9368965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->lvec);
93755659b69SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
9388965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->Mvctx);
9398965ea79SLois Curfman McInnes   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
9408965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
9418965ea79SLois Curfman McInnes   *newmat = mat;
9428965ea79SLois Curfman McInnes   return 0;
9438965ea79SLois Curfman McInnes }
9448965ea79SLois Curfman McInnes 
9458965ea79SLois Curfman McInnes #include "sysio.h"
9468965ea79SLois Curfman McInnes 
94739ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
9488965ea79SLois Curfman McInnes {
9498965ea79SLois Curfman McInnes   Mat          A;
9508965ea79SLois Curfman McInnes   int          i, nz, ierr, j,rstart, rend, fd;
9518965ea79SLois Curfman McInnes   Scalar       *vals,*svals;
9528965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) bview;
9538965ea79SLois Curfman McInnes   MPI_Comm     comm = vobj->comm;
9548965ea79SLois Curfman McInnes   MPI_Status   status;
9558965ea79SLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
9568965ea79SLois Curfman McInnes   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
9578965ea79SLois Curfman McInnes   int          tag = ((PetscObject)bview)->tag;
9588965ea79SLois Curfman McInnes 
9598965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
9608965ea79SLois Curfman McInnes   if (!rank) {
9618965ea79SLois Curfman McInnes     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
9628965ea79SLois Curfman McInnes     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
96339ddd567SLois Curfman McInnes     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
9648965ea79SLois Curfman McInnes   }
9658965ea79SLois Curfman McInnes 
9668965ea79SLois Curfman McInnes   MPI_Bcast(header+1,3,MPI_INT,0,comm);
9678965ea79SLois Curfman McInnes   M = header[1]; N = header[2];
9688965ea79SLois Curfman McInnes   /* determine ownership of all rows */
9698965ea79SLois Curfman McInnes   m = M/size + ((M % size) > rank);
9700452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
9718965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
9728965ea79SLois Curfman McInnes   rowners[0] = 0;
9738965ea79SLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
9748965ea79SLois Curfman McInnes     rowners[i] += rowners[i-1];
9758965ea79SLois Curfman McInnes   }
9768965ea79SLois Curfman McInnes   rstart = rowners[rank];
9778965ea79SLois Curfman McInnes   rend   = rowners[rank+1];
9788965ea79SLois Curfman McInnes 
9798965ea79SLois Curfman McInnes   /* distribute row lengths to all processors */
9800452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
9818965ea79SLois Curfman McInnes   offlens = ourlens + (rend-rstart);
9828965ea79SLois Curfman McInnes   if (!rank) {
9830452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
9848965ea79SLois Curfman McInnes     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
9850452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
9868965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
9878965ea79SLois Curfman McInnes     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
9880452661fSBarry Smith     PetscFree(sndcounts);
9898965ea79SLois Curfman McInnes   }
9908965ea79SLois Curfman McInnes   else {
9918965ea79SLois Curfman McInnes     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
9928965ea79SLois Curfman McInnes   }
9938965ea79SLois Curfman McInnes 
9948965ea79SLois Curfman McInnes   if (!rank) {
9958965ea79SLois Curfman McInnes     /* calculate the number of nonzeros on each processor */
9960452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
997cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
9988965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
9998965ea79SLois Curfman McInnes       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
10008965ea79SLois Curfman McInnes         procsnz[i] += rowlengths[j];
10018965ea79SLois Curfman McInnes       }
10028965ea79SLois Curfman McInnes     }
10030452661fSBarry Smith     PetscFree(rowlengths);
10048965ea79SLois Curfman McInnes 
10058965ea79SLois Curfman McInnes     /* determine max buffer needed and allocate it */
10068965ea79SLois Curfman McInnes     maxnz = 0;
10078965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
10080452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
10098965ea79SLois Curfman McInnes     }
10100452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
10118965ea79SLois Curfman McInnes 
10128965ea79SLois Curfman McInnes     /* read in my part of the matrix column indices  */
10138965ea79SLois Curfman McInnes     nz = procsnz[0];
10140452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
10158965ea79SLois Curfman McInnes     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
10168965ea79SLois Curfman McInnes 
10178965ea79SLois Curfman McInnes     /* read in every one elses and ship off */
10188965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
10198965ea79SLois Curfman McInnes       nz = procsnz[i];
10208965ea79SLois Curfman McInnes       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
10218965ea79SLois Curfman McInnes       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
10228965ea79SLois Curfman McInnes     }
10230452661fSBarry Smith     PetscFree(cols);
10248965ea79SLois Curfman McInnes   }
10258965ea79SLois Curfman McInnes   else {
10268965ea79SLois Curfman McInnes     /* determine buffer space needed for message */
10278965ea79SLois Curfman McInnes     nz = 0;
10288965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10298965ea79SLois Curfman McInnes       nz += ourlens[i];
10308965ea79SLois Curfman McInnes     }
10310452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
10328965ea79SLois Curfman McInnes 
10338965ea79SLois Curfman McInnes     /* receive message of column indices*/
10348965ea79SLois Curfman McInnes     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
10358965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPI_INT,&maxnz);
103639ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
10378965ea79SLois Curfman McInnes   }
10388965ea79SLois Curfman McInnes 
10398965ea79SLois Curfman McInnes   /* loop over local rows, determining number of off diagonal entries */
1040cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
10418965ea79SLois Curfman McInnes   jj = 0;
10428965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10438965ea79SLois Curfman McInnes     for ( j=0; j<ourlens[i]; j++ ) {
10448965ea79SLois Curfman McInnes       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
10458965ea79SLois Curfman McInnes       jj++;
10468965ea79SLois Curfman McInnes     }
10478965ea79SLois Curfman McInnes   }
10488965ea79SLois Curfman McInnes 
10498965ea79SLois Curfman McInnes   /* create our matrix */
10508965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10518965ea79SLois Curfman McInnes     ourlens[i] -= offlens[i];
10528965ea79SLois Curfman McInnes   }
105339ddd567SLois Curfman McInnes   if (type == MATMPIDENSE) {
1054b4fd4287SBarry Smith     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat); CHKERRQ(ierr);
10558965ea79SLois Curfman McInnes   }
10568965ea79SLois Curfman McInnes   A = *newmat;
10578965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10588965ea79SLois Curfman McInnes     ourlens[i] += offlens[i];
10598965ea79SLois Curfman McInnes   }
10608965ea79SLois Curfman McInnes 
10618965ea79SLois Curfman McInnes   if (!rank) {
10620452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
10638965ea79SLois Curfman McInnes 
10648965ea79SLois Curfman McInnes     /* read in my part of the matrix numerical values  */
10658965ea79SLois Curfman McInnes     nz = procsnz[0];
10668965ea79SLois Curfman McInnes     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
10678965ea79SLois Curfman McInnes 
10688965ea79SLois Curfman McInnes     /* insert into matrix */
10698965ea79SLois Curfman McInnes     jj      = rstart;
10708965ea79SLois Curfman McInnes     smycols = mycols;
10718965ea79SLois Curfman McInnes     svals   = vals;
10728965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10738965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10748965ea79SLois Curfman McInnes       smycols += ourlens[i];
10758965ea79SLois Curfman McInnes       svals   += ourlens[i];
10768965ea79SLois Curfman McInnes       jj++;
10778965ea79SLois Curfman McInnes     }
10788965ea79SLois Curfman McInnes 
10798965ea79SLois Curfman McInnes     /* read in other processors and ship out */
10808965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
10818965ea79SLois Curfman McInnes       nz = procsnz[i];
10828965ea79SLois Curfman McInnes       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
10838965ea79SLois Curfman McInnes       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
10848965ea79SLois Curfman McInnes     }
10850452661fSBarry Smith     PetscFree(procsnz);
10868965ea79SLois Curfman McInnes   }
10878965ea79SLois Curfman McInnes   else {
10888965ea79SLois Curfman McInnes     /* receive numeric values */
10890452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
10908965ea79SLois Curfman McInnes 
10918965ea79SLois Curfman McInnes     /* receive message of values*/
10928965ea79SLois Curfman McInnes     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
10938965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
109439ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
10958965ea79SLois Curfman McInnes 
10968965ea79SLois Curfman McInnes     /* insert into matrix */
10978965ea79SLois Curfman McInnes     jj      = rstart;
10988965ea79SLois Curfman McInnes     smycols = mycols;
10998965ea79SLois Curfman McInnes     svals   = vals;
11008965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
11018965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
11028965ea79SLois Curfman McInnes       smycols += ourlens[i];
11038965ea79SLois Curfman McInnes       svals   += ourlens[i];
11048965ea79SLois Curfman McInnes       jj++;
11058965ea79SLois Curfman McInnes     }
11068965ea79SLois Curfman McInnes   }
11070452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
11088965ea79SLois Curfman McInnes 
11098965ea79SLois Curfman McInnes   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
11108965ea79SLois Curfman McInnes   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
11118965ea79SLois Curfman McInnes   return 0;
11128965ea79SLois Curfman McInnes }
1113