xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 227d817a37c50a73d88aa8c031007e5b1fb2e4ec)
18965ea79SLois Curfman McInnes #ifndef lint
2*227d817aSBarry Smith static char vcid[] = "$Id: mpidense.c,v 1.26 1996/01/24 05:45:49 bsmith Exp bsmith $";
38965ea79SLois Curfman McInnes #endif
48965ea79SLois Curfman McInnes 
5ed3cc1f0SBarry Smith /*
6ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
7ed3cc1f0SBarry Smith */
8ed3cc1f0SBarry Smith 
98965ea79SLois Curfman McInnes #include "mpidense.h"
108965ea79SLois Curfman McInnes #include "vec/vecimpl.h"
118965ea79SLois Curfman McInnes 
1239ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
138965ea79SLois Curfman McInnes                                int *idxn,Scalar *v,InsertMode addv)
148965ea79SLois Curfman McInnes {
1539b7565bSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
1639b7565bSBarry Smith   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
1739b7565bSBarry Smith   int          roworiented = A->roworiented;
188965ea79SLois Curfman McInnes 
1939b7565bSBarry Smith   if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) {
2039ddd567SLois Curfman McInnes     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
218965ea79SLois Curfman McInnes   }
2239b7565bSBarry Smith   A->insertmode = addv;
238965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
2439ddd567SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
2539b7565bSBarry Smith     if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
268965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
278965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
2839b7565bSBarry Smith       if (roworiented) {
2939b7565bSBarry Smith         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
3039b7565bSBarry Smith       }
3139b7565bSBarry Smith       else {
328965ea79SLois Curfman McInnes         for ( j=0; j<n; j++ ) {
3339ddd567SLois Curfman McInnes           if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
3439b7565bSBarry Smith           if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large");
3539b7565bSBarry Smith           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
3639b7565bSBarry Smith         }
378965ea79SLois Curfman McInnes       }
388965ea79SLois Curfman McInnes     }
398965ea79SLois Curfman McInnes     else {
4039b7565bSBarry Smith       if (roworiented) {
4139b7565bSBarry Smith         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
4239b7565bSBarry Smith       }
4339b7565bSBarry Smith       else { /* must stash each seperately */
4439b7565bSBarry Smith         row = idxm[i];
4539b7565bSBarry Smith         for ( j=0; j<n; j++ ) {
4639b7565bSBarry Smith           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);
4739b7565bSBarry Smith                  CHKERRQ(ierr);
4839b7565bSBarry Smith         }
4939b7565bSBarry Smith       }
50b49de8d1SLois Curfman McInnes     }
51b49de8d1SLois Curfman McInnes   }
52b49de8d1SLois Curfman McInnes   return 0;
53b49de8d1SLois Curfman McInnes }
54b49de8d1SLois Curfman McInnes 
55b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
56b49de8d1SLois Curfman McInnes {
57b49de8d1SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
58b49de8d1SLois Curfman McInnes   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
59b49de8d1SLois Curfman McInnes 
60b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
61b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row");
62b49de8d1SLois Curfman McInnes     if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large");
63b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
64b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
65b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
66b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column");
67b49de8d1SLois Curfman McInnes         if (idxn[j] >= mdn->N)
68b49de8d1SLois Curfman McInnes           SETERRQ(1,"MatGetValues_MPIDense:Column too large");
69b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
70b49de8d1SLois Curfman McInnes       }
71b49de8d1SLois Curfman McInnes     }
72b49de8d1SLois Curfman McInnes     else {
73b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported");
748965ea79SLois Curfman McInnes     }
758965ea79SLois Curfman McInnes   }
768965ea79SLois Curfman McInnes   return 0;
778965ea79SLois Curfman McInnes }
788965ea79SLois Curfman McInnes 
7939ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
808965ea79SLois Curfman McInnes {
8139ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
828965ea79SLois Curfman McInnes   MPI_Comm     comm = mat->comm;
8339ddd567SLois Curfman McInnes   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
848965ea79SLois Curfman McInnes   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
8539ddd567SLois Curfman McInnes   int          tag = mat->tag, *owner,*starts,count,ierr;
868965ea79SLois Curfman McInnes   InsertMode   addv;
8739ddd567SLois Curfman McInnes   MPI_Request  *send_waits,*recv_waits;
888965ea79SLois Curfman McInnes   Scalar       *rvalues,*svalues;
898965ea79SLois Curfman McInnes 
908965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
9139ddd567SLois Curfman McInnes   MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT,
9239ddd567SLois Curfman McInnes                 MPI_BOR,comm);
9339ddd567SLois Curfman McInnes   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
9439ddd567SLois Curfman McInnes     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
958965ea79SLois Curfman McInnes     }
9639ddd567SLois Curfman McInnes   mdn->insertmode = addv; /* in case this processor had no cache */
978965ea79SLois Curfman McInnes 
988965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
990452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
100cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1010452661fSBarry Smith   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
10239ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
10339ddd567SLois Curfman McInnes     idx = mdn->stash.idx[i];
1048965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1058965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
1068965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1078965ea79SLois Curfman McInnes       }
1088965ea79SLois Curfman McInnes     }
1098965ea79SLois Curfman McInnes   }
1108965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1118965ea79SLois Curfman McInnes 
1128965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
1130452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
11439ddd567SLois Curfman McInnes   MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm);
1158965ea79SLois Curfman McInnes   nreceives = work[rank];
11639ddd567SLois Curfman McInnes   MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm);
1178965ea79SLois Curfman McInnes   nmax = work[rank];
1180452661fSBarry Smith   PetscFree(work);
1198965ea79SLois Curfman McInnes 
1208965ea79SLois Curfman McInnes   /* post receives:
1218965ea79SLois Curfman McInnes        1) each message will consist of ordered pairs
1228965ea79SLois Curfman McInnes      (global index,value) we store the global index as a double
1238965ea79SLois Curfman McInnes      to simplify the message passing.
1248965ea79SLois Curfman McInnes        2) since we don't know how long each individual message is we
1258965ea79SLois Curfman McInnes      allocate the largest needed buffer for each receive. Potentially
1268965ea79SLois Curfman McInnes      this is a lot of wasted space.
1278965ea79SLois Curfman McInnes 
1288965ea79SLois Curfman McInnes        This could be done better.
1298965ea79SLois Curfman McInnes   */
1300452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
1318965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
1320452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
1338965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
1348965ea79SLois Curfman McInnes   for ( i=0; i<nreceives; i++ ) {
13539ddd567SLois Curfman McInnes     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1368965ea79SLois Curfman McInnes               comm,recv_waits+i);
1378965ea79SLois Curfman McInnes   }
1388965ea79SLois Curfman McInnes 
1398965ea79SLois Curfman McInnes   /* do sends:
1408965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
1418965ea79SLois Curfman McInnes          the ith processor
1428965ea79SLois Curfman McInnes   */
1430452661fSBarry Smith   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
14439ddd567SLois Curfman McInnes   CHKPTRQ(svalues);
1450452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
1468965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
1470452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1488965ea79SLois Curfman McInnes   starts[0] = 0;
1498965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
15039ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
15139ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
15239ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
15339ddd567SLois Curfman McInnes     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
1548965ea79SLois Curfman McInnes   }
1550452661fSBarry Smith   PetscFree(owner);
1568965ea79SLois Curfman McInnes   starts[0] = 0;
1578965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1588965ea79SLois Curfman McInnes   count = 0;
1598965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
1608965ea79SLois Curfman McInnes     if (procs[i]) {
16139ddd567SLois Curfman McInnes       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
1628965ea79SLois Curfman McInnes                 comm,send_waits+count++);
1638965ea79SLois Curfman McInnes     }
1648965ea79SLois Curfman McInnes   }
1650452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
1668965ea79SLois Curfman McInnes 
1678965ea79SLois Curfman McInnes   /* Free cache space */
16839ddd567SLois Curfman McInnes   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
1698965ea79SLois Curfman McInnes 
17039ddd567SLois Curfman McInnes   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
17139ddd567SLois Curfman McInnes   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
17239ddd567SLois Curfman McInnes   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
17339ddd567SLois Curfman McInnes   mdn->rmax       = nmax;
1748965ea79SLois Curfman McInnes 
1758965ea79SLois Curfman McInnes   return 0;
1768965ea79SLois Curfman McInnes }
17739ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat);
1788965ea79SLois Curfman McInnes 
17939ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
1808965ea79SLois Curfman McInnes {
18139ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1828965ea79SLois Curfman McInnes   MPI_Status   *send_status,recv_status;
18339ddd567SLois Curfman McInnes   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
1848965ea79SLois Curfman McInnes   Scalar       *values,val;
18539ddd567SLois Curfman McInnes   InsertMode   addv = mdn->insertmode;
1868965ea79SLois Curfman McInnes 
1878965ea79SLois Curfman McInnes   /*  wait on receives */
1888965ea79SLois Curfman McInnes   while (count) {
18939ddd567SLois Curfman McInnes     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
1908965ea79SLois Curfman McInnes     /* unpack receives into our local space */
19139ddd567SLois Curfman McInnes     values = mdn->rvalues + 3*imdex*mdn->rmax;
1928965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
1938965ea79SLois Curfman McInnes     n = n/3;
1948965ea79SLois Curfman McInnes     for ( i=0; i<n; i++ ) {
195*227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - mdn->rstart;
196*227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
1978965ea79SLois Curfman McInnes       val = values[3*i+2];
19839ddd567SLois Curfman McInnes       if (col >= 0 && col < mdn->N) {
19939ddd567SLois Curfman McInnes         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
2008965ea79SLois Curfman McInnes       }
20139ddd567SLois Curfman McInnes       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
2028965ea79SLois Curfman McInnes     }
2038965ea79SLois Curfman McInnes     count--;
2048965ea79SLois Curfman McInnes   }
2050452661fSBarry Smith   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
2068965ea79SLois Curfman McInnes 
2078965ea79SLois Curfman McInnes   /* wait on sends */
20839ddd567SLois Curfman McInnes   if (mdn->nsends) {
2090452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) );
2108965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
21139ddd567SLois Curfman McInnes     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
2120452661fSBarry Smith     PetscFree(send_status);
2138965ea79SLois Curfman McInnes   }
2140452661fSBarry Smith   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
2158965ea79SLois Curfman McInnes 
21639ddd567SLois Curfman McInnes   mdn->insertmode = NOT_SET_VALUES;
21739ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
21839ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
2198965ea79SLois Curfman McInnes 
220*227d817aSBarry Smith   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
22139ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
2228965ea79SLois Curfman McInnes   }
2238965ea79SLois Curfman McInnes   return 0;
2248965ea79SLois Curfman McInnes }
2258965ea79SLois Curfman McInnes 
22639ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A)
2278965ea79SLois Curfman McInnes {
22839ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
22939ddd567SLois Curfman McInnes   return MatZeroEntries(l->A);
2308965ea79SLois Curfman McInnes }
2318965ea79SLois Curfman McInnes 
2328965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
2338965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
2348965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
2353501a2bdSLois Curfman McInnes    mdn->A and mdn->B directly and not through the MatZeroRows()
2368965ea79SLois Curfman McInnes    routine.
2378965ea79SLois Curfman McInnes */
23839ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
2398965ea79SLois Curfman McInnes {
24039ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
2418965ea79SLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
2428965ea79SLois Curfman McInnes   int            *procs,*nprocs,j,found,idx,nsends,*work;
2438965ea79SLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
2448965ea79SLois Curfman McInnes   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
2458965ea79SLois Curfman McInnes   int            *lens,imdex,*lrows,*values;
2468965ea79SLois Curfman McInnes   MPI_Comm       comm = A->comm;
2478965ea79SLois Curfman McInnes   MPI_Request    *send_waits,*recv_waits;
2488965ea79SLois Curfman McInnes   MPI_Status     recv_status,*send_status;
2498965ea79SLois Curfman McInnes   IS             istmp;
2508965ea79SLois Curfman McInnes 
2518965ea79SLois Curfman McInnes   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
2528965ea79SLois Curfman McInnes   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
2538965ea79SLois Curfman McInnes 
2548965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
2550452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
256cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
2570452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
2588965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2598965ea79SLois Curfman McInnes     idx = rows[i];
2608965ea79SLois Curfman McInnes     found = 0;
2618965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2628965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
2638965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2648965ea79SLois Curfman McInnes       }
2658965ea79SLois Curfman McInnes     }
26639ddd567SLois Curfman McInnes     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
2678965ea79SLois Curfman McInnes   }
2688965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2698965ea79SLois Curfman McInnes 
2708965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
2710452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
2728965ea79SLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
2738965ea79SLois Curfman McInnes   nrecvs = work[rank];
2748965ea79SLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
2758965ea79SLois Curfman McInnes   nmax = work[rank];
2760452661fSBarry Smith   PetscFree(work);
2778965ea79SLois Curfman McInnes 
2788965ea79SLois Curfman McInnes   /* post receives:   */
2790452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
2808965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
2810452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
2828965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
2838965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
2848965ea79SLois Curfman McInnes     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2858965ea79SLois Curfman McInnes   }
2868965ea79SLois Curfman McInnes 
2878965ea79SLois Curfman McInnes   /* do sends:
2888965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
2898965ea79SLois Curfman McInnes          the ith processor
2908965ea79SLois Curfman McInnes   */
2910452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
2920452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
2938965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
2940452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
2958965ea79SLois Curfman McInnes   starts[0] = 0;
2968965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2978965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2988965ea79SLois Curfman McInnes     svalues[starts[owner[i]]++] = rows[i];
2998965ea79SLois Curfman McInnes   }
3008965ea79SLois Curfman McInnes   ISRestoreIndices(is,&rows);
3018965ea79SLois Curfman McInnes 
3028965ea79SLois Curfman McInnes   starts[0] = 0;
3038965ea79SLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3048965ea79SLois Curfman McInnes   count = 0;
3058965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3068965ea79SLois Curfman McInnes     if (procs[i]) {
3078965ea79SLois Curfman McInnes       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3088965ea79SLois Curfman McInnes     }
3098965ea79SLois Curfman McInnes   }
3100452661fSBarry Smith   PetscFree(starts);
3118965ea79SLois Curfman McInnes 
3128965ea79SLois Curfman McInnes   base = owners[rank];
3138965ea79SLois Curfman McInnes 
3148965ea79SLois Curfman McInnes   /*  wait on receives */
3150452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3168965ea79SLois Curfman McInnes   source = lens + nrecvs;
3178965ea79SLois Curfman McInnes   count  = nrecvs; slen = 0;
3188965ea79SLois Curfman McInnes   while (count) {
3198965ea79SLois Curfman McInnes     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3208965ea79SLois Curfman McInnes     /* unpack receives into our local space */
3218965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPI_INT,&n);
3228965ea79SLois Curfman McInnes     source[imdex]  = recv_status.MPI_SOURCE;
3238965ea79SLois Curfman McInnes     lens[imdex]  = n;
3248965ea79SLois Curfman McInnes     slen += n;
3258965ea79SLois Curfman McInnes     count--;
3268965ea79SLois Curfman McInnes   }
3270452661fSBarry Smith   PetscFree(recv_waits);
3288965ea79SLois Curfman McInnes 
3298965ea79SLois Curfman McInnes   /* move the data into the send scatter */
3300452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
3318965ea79SLois Curfman McInnes   count = 0;
3328965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
3338965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
3348965ea79SLois Curfman McInnes     for ( j=0; j<lens[i]; j++ ) {
3358965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
3368965ea79SLois Curfman McInnes     }
3378965ea79SLois Curfman McInnes   }
3380452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
3390452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
3408965ea79SLois Curfman McInnes 
3418965ea79SLois Curfman McInnes   /* actually zap the local rows */
3428965ea79SLois Curfman McInnes   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
3438965ea79SLois Curfman McInnes   PLogObjectParent(A,istmp);
3440452661fSBarry Smith   PetscFree(lrows);
3458965ea79SLois Curfman McInnes   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
3468965ea79SLois Curfman McInnes   ierr = ISDestroy(istmp); CHKERRQ(ierr);
3478965ea79SLois Curfman McInnes 
3488965ea79SLois Curfman McInnes   /* wait on sends */
3498965ea79SLois Curfman McInnes   if (nsends) {
3500452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
3518965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
3528965ea79SLois Curfman McInnes     MPI_Waitall(nsends,send_waits,send_status);
3530452661fSBarry Smith     PetscFree(send_status);
3548965ea79SLois Curfman McInnes   }
3550452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
3568965ea79SLois Curfman McInnes 
3578965ea79SLois Curfman McInnes   return 0;
3588965ea79SLois Curfman McInnes }
3598965ea79SLois Curfman McInnes 
36039ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
3618965ea79SLois Curfman McInnes {
36239ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3638965ea79SLois Curfman McInnes   int          ierr;
364c456f294SBarry Smith 
36539ddd567SLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
36639ddd567SLois Curfman McInnes   CHKERRQ(ierr);
36739ddd567SLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
36839ddd567SLois Curfman McInnes   CHKERRQ(ierr);
36939ddd567SLois Curfman McInnes   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
3708965ea79SLois Curfman McInnes   return 0;
3718965ea79SLois Curfman McInnes }
3728965ea79SLois Curfman McInnes 
37339ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
3748965ea79SLois Curfman McInnes {
37539ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3768965ea79SLois Curfman McInnes   int          ierr;
377c456f294SBarry Smith 
378c456f294SBarry Smith   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
379c456f294SBarry Smith   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
38039ddd567SLois Curfman McInnes   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
3818965ea79SLois Curfman McInnes   return 0;
3828965ea79SLois Curfman McInnes }
3838965ea79SLois Curfman McInnes 
384096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
385096963f5SLois Curfman McInnes {
386096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
387096963f5SLois Curfman McInnes   int          ierr;
3883501a2bdSLois Curfman McInnes   Scalar       zero = 0.0;
389096963f5SLois Curfman McInnes 
3903501a2bdSLois Curfman McInnes   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
391096963f5SLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
392096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
393096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
394096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
395096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
396096963f5SLois Curfman McInnes   return 0;
397096963f5SLois Curfman McInnes }
398096963f5SLois Curfman McInnes 
399096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
400096963f5SLois Curfman McInnes {
401096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
402096963f5SLois Curfman McInnes   int          ierr;
403096963f5SLois Curfman McInnes 
4043501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
4053501a2bdSLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
406096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
407096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
408096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
409096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
410096963f5SLois Curfman McInnes   return 0;
411096963f5SLois Curfman McInnes }
412096963f5SLois Curfman McInnes 
41339ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v)
4148965ea79SLois Curfman McInnes {
41539ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
416096963f5SLois Curfman McInnes   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
417096963f5SLois Curfman McInnes   int          ierr, i, n, m = a->m, radd;
418096963f5SLois Curfman McInnes   Scalar       *x;
419ed3cc1f0SBarry Smith 
420096963f5SLois Curfman McInnes   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
421096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
422096963f5SLois Curfman McInnes   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
423096963f5SLois Curfman McInnes   radd = a->rstart*m*m;
424096963f5SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
425096963f5SLois Curfman McInnes     x[i] = aloc->v[radd + i*m + i];
426096963f5SLois Curfman McInnes   }
427096963f5SLois Curfman McInnes   return 0;
4288965ea79SLois Curfman McInnes }
4298965ea79SLois Curfman McInnes 
43039ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj)
4318965ea79SLois Curfman McInnes {
4328965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
4333501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4348965ea79SLois Curfman McInnes   int          ierr;
435ed3cc1f0SBarry Smith 
4368965ea79SLois Curfman McInnes #if defined(PETSC_LOG)
4373501a2bdSLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
4388965ea79SLois Curfman McInnes #endif
4390452661fSBarry Smith   PetscFree(mdn->rowners);
4403501a2bdSLois Curfman McInnes   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
4413501a2bdSLois Curfman McInnes   if (mdn->lvec)   VecDestroy(mdn->lvec);
4423501a2bdSLois Curfman McInnes   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
443622d7880SLois Curfman McInnes   if (mdn->factor) {
444622d7880SLois Curfman McInnes     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
445622d7880SLois Curfman McInnes     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
446622d7880SLois Curfman McInnes     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
447622d7880SLois Curfman McInnes     PetscFree(mdn->factor);
448622d7880SLois Curfman McInnes   }
4490452661fSBarry Smith   PetscFree(mdn);
4508965ea79SLois Curfman McInnes   PLogObjectDestroy(mat);
4510452661fSBarry Smith   PetscHeaderDestroy(mat);
4528965ea79SLois Curfman McInnes   return 0;
4538965ea79SLois Curfman McInnes }
45439ddd567SLois Curfman McInnes 
4558965ea79SLois Curfman McInnes #include "pinclude/pviewer.h"
4568965ea79SLois Curfman McInnes 
45739ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
4588965ea79SLois Curfman McInnes {
45939ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4608965ea79SLois Curfman McInnes   int          ierr;
46139ddd567SLois Curfman McInnes   if (mdn->size == 1) {
46239ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4638965ea79SLois Curfman McInnes   }
46439ddd567SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
4658965ea79SLois Curfman McInnes   return 0;
4668965ea79SLois Curfman McInnes }
4678965ea79SLois Curfman McInnes 
46839ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
4698965ea79SLois Curfman McInnes {
47039ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
47139ddd567SLois Curfman McInnes   int          ierr, format;
4728965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
4738965ea79SLois Curfman McInnes   FILE         *fd;
4748965ea79SLois Curfman McInnes 
475*227d817aSBarry Smith   ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
4768965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
4778965ea79SLois Curfman McInnes     ierr = ViewerFileGetFormat_Private(viewer,&format);
4783501a2bdSLois Curfman McInnes     if (format == FILE_FORMAT_INFO_DETAILED) {
479096963f5SLois Curfman McInnes       int nz, nzalloc, mem, rank;
480096963f5SLois Curfman McInnes       MPI_Comm_rank(mat->comm,&rank);
481096963f5SLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
482096963f5SLois Curfman McInnes       MPIU_Seq_begin(mat->comm,1);
4833501a2bdSLois Curfman McInnes         fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
484096963f5SLois Curfman McInnes             rank,mdn->m,nz,nzalloc,mem);
485096963f5SLois Curfman McInnes       fflush(fd);
486096963f5SLois Curfman McInnes       MPIU_Seq_end(mat->comm,1);
4873501a2bdSLois Curfman McInnes       ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
4883501a2bdSLois Curfman McInnes       return 0;
4893501a2bdSLois Curfman McInnes     }
4903501a2bdSLois Curfman McInnes     else if (format == FILE_FORMAT_INFO) {
491096963f5SLois Curfman McInnes       return 0;
4928965ea79SLois Curfman McInnes     }
4938965ea79SLois Curfman McInnes   }
4948965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER) {
4958965ea79SLois Curfman McInnes     MPIU_Seq_begin(mat->comm,1);
49639ddd567SLois Curfman McInnes     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
49739ddd567SLois Curfman McInnes              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
49839ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4998965ea79SLois Curfman McInnes     fflush(fd);
5008965ea79SLois Curfman McInnes     MPIU_Seq_end(mat->comm,1);
5018965ea79SLois Curfman McInnes   }
5028965ea79SLois Curfman McInnes   else {
50339ddd567SLois Curfman McInnes     int size = mdn->size, rank = mdn->rank;
5048965ea79SLois Curfman McInnes     if (size == 1) {
50539ddd567SLois Curfman McInnes       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
5068965ea79SLois Curfman McInnes     }
5078965ea79SLois Curfman McInnes     else {
5088965ea79SLois Curfman McInnes       /* assemble the entire matrix onto first processor. */
5098965ea79SLois Curfman McInnes       Mat          A;
51039ddd567SLois Curfman McInnes       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
51139ddd567SLois Curfman McInnes       Scalar       *vals;
51239ddd567SLois Curfman McInnes       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
5138965ea79SLois Curfman McInnes 
5148965ea79SLois Curfman McInnes       if (!rank) {
515b4fd4287SBarry Smith         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
5168965ea79SLois Curfman McInnes       }
5178965ea79SLois Curfman McInnes       else {
518b4fd4287SBarry Smith         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
5198965ea79SLois Curfman McInnes       }
5208965ea79SLois Curfman McInnes       PLogObjectParent(mat,A);
5218965ea79SLois Curfman McInnes 
52239ddd567SLois Curfman McInnes       /* Copy the matrix ... This isn't the most efficient means,
52339ddd567SLois Curfman McInnes          but it's quick for now */
52439ddd567SLois Curfman McInnes       row = mdn->rstart; m = Amdn->m;
5258965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
52639ddd567SLois Curfman McInnes         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
52739ddd567SLois Curfman McInnes         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
52839ddd567SLois Curfman McInnes         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
52939ddd567SLois Curfman McInnes         row++;
5308965ea79SLois Curfman McInnes       }
5318965ea79SLois Curfman McInnes 
5328965ea79SLois Curfman McInnes       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5338965ea79SLois Curfman McInnes       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5348965ea79SLois Curfman McInnes       if (!rank) {
53539ddd567SLois Curfman McInnes         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
5368965ea79SLois Curfman McInnes       }
5378965ea79SLois Curfman McInnes       ierr = MatDestroy(A); CHKERRQ(ierr);
5388965ea79SLois Curfman McInnes     }
5398965ea79SLois Curfman McInnes   }
5408965ea79SLois Curfman McInnes   return 0;
5418965ea79SLois Curfman McInnes }
5428965ea79SLois Curfman McInnes 
54339ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer)
5448965ea79SLois Curfman McInnes {
5458965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
5468965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
54739ddd567SLois Curfman McInnes   int          ierr;
5488965ea79SLois Curfman McInnes 
5498965ea79SLois Curfman McInnes   if (!viewer) {
5508965ea79SLois Curfman McInnes     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
5518965ea79SLois Curfman McInnes   }
55239ddd567SLois Curfman McInnes   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
55339ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5548965ea79SLois Curfman McInnes   }
5558965ea79SLois Curfman McInnes   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
55639ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5578965ea79SLois Curfman McInnes   }
5588965ea79SLois Curfman McInnes   else if (vobj->type == BINARY_FILE_VIEWER) {
55939ddd567SLois Curfman McInnes     return MatView_MPIDense_Binary(mat,viewer);
5608965ea79SLois Curfman McInnes   }
5618965ea79SLois Curfman McInnes   return 0;
5628965ea79SLois Curfman McInnes }
5638965ea79SLois Curfman McInnes 
5643501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,
5658965ea79SLois Curfman McInnes                              int *nzalloc,int *mem)
5668965ea79SLois Curfman McInnes {
5673501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5683501a2bdSLois Curfman McInnes   Mat          mdn = mat->A;
56939ddd567SLois Curfman McInnes   int          ierr, isend[3], irecv[3];
5708965ea79SLois Curfman McInnes 
5713501a2bdSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
5728965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
5738965ea79SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
5748965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
5753501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
5768965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5778965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
5783501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
5798965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5808965ea79SLois Curfman McInnes   }
5818965ea79SLois Curfman McInnes   return 0;
5828965ea79SLois Curfman McInnes }
5838965ea79SLois Curfman McInnes 
5848c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
5858aaee692SLois Curfman McInnes    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
5868aaee692SLois Curfman McInnes    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
5878aaee692SLois Curfman McInnes    extern int MatSolve_MPIDense(Mat,Vec,Vec);
5888c469469SLois Curfman McInnes    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
5898aaee692SLois Curfman McInnes    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
5908aaee692SLois Curfman McInnes    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
5918aaee692SLois Curfman McInnes 
59239ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op)
5938965ea79SLois Curfman McInnes {
59439ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
5958965ea79SLois Curfman McInnes 
5968965ea79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
5978965ea79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
5988965ea79SLois Curfman McInnes       op == COLUMNS_SORTED ||
5998965ea79SLois Curfman McInnes       op == ROW_ORIENTED) {
6008965ea79SLois Curfman McInnes         MatSetOption(a->A,op);
6018965ea79SLois Curfman McInnes   }
6028965ea79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
6038965ea79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
6048965ea79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
6058965ea79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
60639ddd567SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
6078965ea79SLois Curfman McInnes   else if (op == COLUMN_ORIENTED)
60839b7565bSBarry Smith     { a->roworiented = 0; MatSetOption(a->A,op);}
6098965ea79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
61039ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
6118965ea79SLois Curfman McInnes   else
61239ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
6138965ea79SLois Curfman McInnes   return 0;
6148965ea79SLois Curfman McInnes }
6158965ea79SLois Curfman McInnes 
6163501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n)
6178965ea79SLois Curfman McInnes {
6183501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6198965ea79SLois Curfman McInnes   *m = mat->M; *n = mat->N;
6208965ea79SLois Curfman McInnes   return 0;
6218965ea79SLois Curfman McInnes }
6228965ea79SLois Curfman McInnes 
6233501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
6248965ea79SLois Curfman McInnes {
6253501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6268965ea79SLois Curfman McInnes   *m = mat->m; *n = mat->N;
6278965ea79SLois Curfman McInnes   return 0;
6288965ea79SLois Curfman McInnes }
6298965ea79SLois Curfman McInnes 
6303501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
6318965ea79SLois Curfman McInnes {
6323501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6338965ea79SLois Curfman McInnes   *m = mat->rstart; *n = mat->rend;
6348965ea79SLois Curfman McInnes   return 0;
6358965ea79SLois Curfman McInnes }
6368965ea79SLois Curfman McInnes 
6373501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
6388965ea79SLois Curfman McInnes {
6393501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
64039ddd567SLois Curfman McInnes   int          lrow, rstart = mat->rstart, rend = mat->rend;
6418965ea79SLois Curfman McInnes 
64239ddd567SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
6438965ea79SLois Curfman McInnes   lrow = row - rstart;
64439ddd567SLois Curfman McInnes   return MatGetRow(mat->A,lrow,nz,idx,v);
6458965ea79SLois Curfman McInnes }
6468965ea79SLois Curfman McInnes 
64739ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
6488965ea79SLois Curfman McInnes {
6490452661fSBarry Smith   if (idx) PetscFree(*idx);
6500452661fSBarry Smith   if (v) PetscFree(*v);
6518965ea79SLois Curfman McInnes   return 0;
6528965ea79SLois Curfman McInnes }
6538965ea79SLois Curfman McInnes 
654cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
655096963f5SLois Curfman McInnes {
6563501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
6573501a2bdSLois Curfman McInnes   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
6583501a2bdSLois Curfman McInnes   int          ierr, i, j;
6593501a2bdSLois Curfman McInnes   double       sum = 0.0;
6603501a2bdSLois Curfman McInnes   Scalar       *v = mat->v;
6613501a2bdSLois Curfman McInnes 
6623501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
6633501a2bdSLois Curfman McInnes     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
6643501a2bdSLois Curfman McInnes   } else {
6653501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
6663501a2bdSLois Curfman McInnes       for (i=0; i<mat->n*mat->m; i++ ) {
6673501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX)
6683501a2bdSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
6693501a2bdSLois Curfman McInnes #else
6703501a2bdSLois Curfman McInnes         sum += (*v)*(*v); v++;
6713501a2bdSLois Curfman McInnes #endif
6723501a2bdSLois Curfman McInnes       }
6733501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
6743501a2bdSLois Curfman McInnes       *norm = sqrt(*norm);
6753501a2bdSLois Curfman McInnes       PLogFlops(2*mat->n*mat->m);
6763501a2bdSLois Curfman McInnes     }
6773501a2bdSLois Curfman McInnes     else if (type == NORM_1) {
6783501a2bdSLois Curfman McInnes       double *tmp, *tmp2;
6790452661fSBarry Smith       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
6803501a2bdSLois Curfman McInnes       tmp2 = tmp + mdn->N;
681cddf8d76SBarry Smith       PetscMemzero(tmp,2*mdn->N*sizeof(double));
682096963f5SLois Curfman McInnes       *norm = 0.0;
6833501a2bdSLois Curfman McInnes       v = mat->v;
6843501a2bdSLois Curfman McInnes       for ( j=0; j<mat->n; j++ ) {
6853501a2bdSLois Curfman McInnes         for ( i=0; i<mat->m; i++ ) {
68667e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
6873501a2bdSLois Curfman McInnes         }
6883501a2bdSLois Curfman McInnes       }
6893501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
6903501a2bdSLois Curfman McInnes       for ( j=0; j<mdn->N; j++ ) {
6913501a2bdSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
6923501a2bdSLois Curfman McInnes       }
6930452661fSBarry Smith       PetscFree(tmp);
6943501a2bdSLois Curfman McInnes       PLogFlops(mat->n*mat->m);
6953501a2bdSLois Curfman McInnes     }
6963501a2bdSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
6973501a2bdSLois Curfman McInnes       double ntemp;
6983501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
6993501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
7003501a2bdSLois Curfman McInnes     }
7013501a2bdSLois Curfman McInnes     else {
7023501a2bdSLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
7033501a2bdSLois Curfman McInnes     }
7043501a2bdSLois Curfman McInnes   }
7053501a2bdSLois Curfman McInnes   return 0;
7063501a2bdSLois Curfman McInnes }
7073501a2bdSLois Curfman McInnes 
7083501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout)
7093501a2bdSLois Curfman McInnes {
7103501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
7113501a2bdSLois Curfman McInnes   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
7123501a2bdSLois Curfman McInnes   Mat          B;
7133501a2bdSLois Curfman McInnes   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
7143501a2bdSLois Curfman McInnes   int          j, i, ierr;
7153501a2bdSLois Curfman McInnes   Scalar       *v;
7163501a2bdSLois Curfman McInnes 
7173501a2bdSLois Curfman McInnes   if (!matout && M != N)
7183501a2bdSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
719b4fd4287SBarry Smith   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);
720ed2daf61SLois Curfman McInnes          CHKERRQ(ierr);
7213501a2bdSLois Curfman McInnes 
7223501a2bdSLois Curfman McInnes   m = Aloc->m; n = Aloc->n; v = Aloc->v;
7230452661fSBarry Smith   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
7243501a2bdSLois Curfman McInnes   for ( j=0; j<n; j++ ) {
7253501a2bdSLois Curfman McInnes     for (i=0; i<m; i++) rwork[i] = rstart + i;
7263501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
7273501a2bdSLois Curfman McInnes     v += m;
7283501a2bdSLois Curfman McInnes   }
7290452661fSBarry Smith   PetscFree(rwork);
7303501a2bdSLois Curfman McInnes   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
7313501a2bdSLois Curfman McInnes   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
7323501a2bdSLois Curfman McInnes   if (matout) {
7333501a2bdSLois Curfman McInnes     *matout = B;
7343501a2bdSLois Curfman McInnes   } else {
7353501a2bdSLois Curfman McInnes     /* This isn't really an in-place transpose, but free data struct from a */
7360452661fSBarry Smith     PetscFree(a->rowners);
7373501a2bdSLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
7383501a2bdSLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
7393501a2bdSLois Curfman McInnes     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
7400452661fSBarry Smith     PetscFree(a);
7413501a2bdSLois Curfman McInnes     PetscMemcpy(A,B,sizeof(struct _Mat));
7420452661fSBarry Smith     PetscHeaderDestroy(B);
7433501a2bdSLois Curfman McInnes   }
744096963f5SLois Curfman McInnes   return 0;
745096963f5SLois Curfman McInnes }
746096963f5SLois Curfman McInnes 
7473d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat,Mat *,int);
7488965ea79SLois Curfman McInnes 
7498965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
75039ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense,
75139ddd567SLois Curfman McInnes        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
75239ddd567SLois Curfman McInnes        MatMult_MPIDense,MatMultAdd_MPIDense,
753096963f5SLois Curfman McInnes        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
754e7ca0642SLois Curfman McInnes /*       MatSolve_MPIDense,0, */
755e7ca0642SLois Curfman McInnes        0,0,
75639ddd567SLois Curfman McInnes        0,0,
7578c469469SLois Curfman McInnes        0,0,
7588c469469SLois Curfman McInnes /*       MatLUFactor_MPIDense,0, */
7598aaee692SLois Curfman McInnes        0,MatTranspose_MPIDense,
76039ddd567SLois Curfman McInnes        MatGetInfo_MPIDense,0,
761096963f5SLois Curfman McInnes        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
76239ddd567SLois Curfman McInnes        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
7638965ea79SLois Curfman McInnes        0,
76439ddd567SLois Curfman McInnes        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
7658c469469SLois Curfman McInnes        0,0,0,
7668c469469SLois Curfman McInnes /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
7678aaee692SLois Curfman McInnes        0,0,
76839ddd567SLois Curfman McInnes        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
76939ddd567SLois Curfman McInnes        MatGetOwnershipRange_MPIDense,
77039ddd567SLois Curfman McInnes        0,0,
7713d1612f7SBarry Smith        0,0,0,0,0,MatConvertSameType_MPIDense,
772b49de8d1SLois Curfman McInnes        0,0,0,0,0,
773b49de8d1SLois Curfman McInnes        0,0,MatGetValues_MPIDense};
7748965ea79SLois Curfman McInnes 
7758965ea79SLois Curfman McInnes /*@C
77639ddd567SLois Curfman McInnes    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
7778965ea79SLois Curfman McInnes 
7788965ea79SLois Curfman McInnes    Input Parameters:
7798965ea79SLois Curfman McInnes .  comm - MPI communicator
7808965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
7818965ea79SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
7828965ea79SLois Curfman McInnes            if N is given)
7838965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
7848965ea79SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
7858965ea79SLois Curfman McInnes            if n is given)
786b4fd4287SBarry Smith .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
787dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
7888965ea79SLois Curfman McInnes 
7898965ea79SLois Curfman McInnes    Output Parameter:
7908965ea79SLois Curfman McInnes .  newmat - the matrix
7918965ea79SLois Curfman McInnes 
7928965ea79SLois Curfman McInnes    Notes:
79339ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
79439ddd567SLois Curfman McInnes    storage by columns.
7958965ea79SLois Curfman McInnes 
79618f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
79718f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
798b4fd4287SBarry Smith    set data=PETSC_NULL.
79918f449edSLois Curfman McInnes 
8008965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
8018965ea79SLois Curfman McInnes    (possibly both).
8028965ea79SLois Curfman McInnes 
8033501a2bdSLois Curfman McInnes    Currently, the only parallel dense matrix decomposition is by rows,
8043501a2bdSLois Curfman McInnes    so that n=N and each submatrix owns all of the global columns.
8053501a2bdSLois Curfman McInnes 
80639ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel
8078965ea79SLois Curfman McInnes 
80839ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
8098965ea79SLois Curfman McInnes @*/
81018f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat)
8118965ea79SLois Curfman McInnes {
8128965ea79SLois Curfman McInnes   Mat          mat;
81339ddd567SLois Curfman McInnes   Mat_MPIDense *a;
81425cdf11fSBarry Smith   int          ierr, i,flg;
8158965ea79SLois Curfman McInnes 
816ed2daf61SLois Curfman McInnes /* Note:  For now, when data is specified above, this assumes the user correctly
817ed2daf61SLois Curfman McInnes    allocates the local dense storage space.  We should add error checking. */
81818f449edSLois Curfman McInnes 
8198965ea79SLois Curfman McInnes   *newmat         = 0;
8200452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
8218965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
8220452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
8238965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
82439ddd567SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIDense;
82539ddd567SLois Curfman McInnes   mat->view       = MatView_MPIDense;
8268965ea79SLois Curfman McInnes   mat->factor     = 0;
8278965ea79SLois Curfman McInnes 
828622d7880SLois Curfman McInnes   a->factor = 0;
8298965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
8308965ea79SLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
8318965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
8328965ea79SLois Curfman McInnes 
83339ddd567SLois Curfman McInnes   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
8348965ea79SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
83539ddd567SLois Curfman McInnes 
83639ddd567SLois Curfman McInnes   /* each row stores all columns */
83739ddd567SLois Curfman McInnes   if (N == PETSC_DECIDE) N = n;
838d7e8b826SBarry Smith   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
839d7e8b826SBarry Smith   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
8408965ea79SLois Curfman McInnes   a->N = N;
8418965ea79SLois Curfman McInnes   a->M = M;
84239ddd567SLois Curfman McInnes   a->m = m;
84339ddd567SLois Curfman McInnes   a->n = n;
8448965ea79SLois Curfman McInnes 
8458965ea79SLois Curfman McInnes   /* build local table of row and column ownerships */
846d7e8b826SBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
847d7e8b826SBarry Smith   a->cowners = a->rowners + a->size + 1;
848d7e8b826SBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
84939ddd567SLois Curfman McInnes                        sizeof(Mat_MPIDense));
8508965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
8518965ea79SLois Curfman McInnes   a->rowners[0] = 0;
8528965ea79SLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
8538965ea79SLois Curfman McInnes     a->rowners[i] += a->rowners[i-1];
8548965ea79SLois Curfman McInnes   }
8558965ea79SLois Curfman McInnes   a->rstart = a->rowners[a->rank];
8568965ea79SLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
857d7e8b826SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
858d7e8b826SBarry Smith   a->cowners[0] = 0;
859d7e8b826SBarry Smith   for ( i=2; i<=a->size; i++ ) {
860d7e8b826SBarry Smith     a->cowners[i] += a->cowners[i-1];
861d7e8b826SBarry Smith   }
8628965ea79SLois Curfman McInnes 
86318f449edSLois Curfman McInnes   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
8648965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
8658965ea79SLois Curfman McInnes 
8668965ea79SLois Curfman McInnes   /* build cache for off array entries formed */
8678965ea79SLois Curfman McInnes   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
8688965ea79SLois Curfman McInnes 
8698965ea79SLois Curfman McInnes   /* stuff used for matrix vector multiply */
8708965ea79SLois Curfman McInnes   a->lvec        = 0;
8718965ea79SLois Curfman McInnes   a->Mvctx       = 0;
87239b7565bSBarry Smith   a->roworiented = 1;
8738965ea79SLois Curfman McInnes 
8748965ea79SLois Curfman McInnes   *newmat = mat;
87525cdf11fSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
87625cdf11fSBarry Smith   if (flg) {
8778c469469SLois Curfman McInnes     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
8788c469469SLois Curfman McInnes   }
8798965ea79SLois Curfman McInnes   return 0;
8808965ea79SLois Curfman McInnes }
8818965ea79SLois Curfman McInnes 
8823d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
8838965ea79SLois Curfman McInnes {
8848965ea79SLois Curfman McInnes   Mat          mat;
8853501a2bdSLois Curfman McInnes   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
88639ddd567SLois Curfman McInnes   int          ierr;
8872ba99913SLois Curfman McInnes   FactorCtx    *factor;
8888965ea79SLois Curfman McInnes 
8898965ea79SLois Curfman McInnes   *newmat       = 0;
8900452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
8918965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
8920452661fSBarry Smith   mat->data     = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
8938965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
89439ddd567SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIDense;
89539ddd567SLois Curfman McInnes   mat->view     = MatView_MPIDense;
8963501a2bdSLois Curfman McInnes   mat->factor   = A->factor;
897c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
8988965ea79SLois Curfman McInnes 
8998965ea79SLois Curfman McInnes   a->m          = oldmat->m;
9008965ea79SLois Curfman McInnes   a->n          = oldmat->n;
9018965ea79SLois Curfman McInnes   a->M          = oldmat->M;
9028965ea79SLois Curfman McInnes   a->N          = oldmat->N;
9032ba99913SLois Curfman McInnes   if (oldmat->factor) {
9042ba99913SLois Curfman McInnes     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
9052ba99913SLois Curfman McInnes     /* copy factor contents ... add this code! */
9062ba99913SLois Curfman McInnes   } else a->factor = 0;
9078965ea79SLois Curfman McInnes 
9088965ea79SLois Curfman McInnes   a->rstart     = oldmat->rstart;
9098965ea79SLois Curfman McInnes   a->rend       = oldmat->rend;
9108965ea79SLois Curfman McInnes   a->size       = oldmat->size;
9118965ea79SLois Curfman McInnes   a->rank       = oldmat->rank;
9128965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
9138965ea79SLois Curfman McInnes 
9140452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
91539ddd567SLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
9168965ea79SLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
9178965ea79SLois Curfman McInnes   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
9188965ea79SLois Curfman McInnes 
9198965ea79SLois Curfman McInnes   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
9208965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->lvec);
92155659b69SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
9228965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->Mvctx);
9238965ea79SLois Curfman McInnes   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
9248965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
9258965ea79SLois Curfman McInnes   *newmat = mat;
9268965ea79SLois Curfman McInnes   return 0;
9278965ea79SLois Curfman McInnes }
9288965ea79SLois Curfman McInnes 
9298965ea79SLois Curfman McInnes #include "sysio.h"
9308965ea79SLois Curfman McInnes 
93139ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
9328965ea79SLois Curfman McInnes {
9338965ea79SLois Curfman McInnes   Mat          A;
9348965ea79SLois Curfman McInnes   int          i, nz, ierr, j,rstart, rend, fd;
9358965ea79SLois Curfman McInnes   Scalar       *vals,*svals;
9368965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) bview;
9378965ea79SLois Curfman McInnes   MPI_Comm     comm = vobj->comm;
9388965ea79SLois Curfman McInnes   MPI_Status   status;
9398965ea79SLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
9408965ea79SLois Curfman McInnes   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
9418965ea79SLois Curfman McInnes   int          tag = ((PetscObject)bview)->tag;
9428965ea79SLois Curfman McInnes 
9438965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
9448965ea79SLois Curfman McInnes   if (!rank) {
9458965ea79SLois Curfman McInnes     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
9468965ea79SLois Curfman McInnes     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
94739ddd567SLois Curfman McInnes     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
9488965ea79SLois Curfman McInnes   }
9498965ea79SLois Curfman McInnes 
9508965ea79SLois Curfman McInnes   MPI_Bcast(header+1,3,MPI_INT,0,comm);
9518965ea79SLois Curfman McInnes   M = header[1]; N = header[2];
9528965ea79SLois Curfman McInnes   /* determine ownership of all rows */
9538965ea79SLois Curfman McInnes   m = M/size + ((M % size) > rank);
9540452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
9558965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
9568965ea79SLois Curfman McInnes   rowners[0] = 0;
9578965ea79SLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
9588965ea79SLois Curfman McInnes     rowners[i] += rowners[i-1];
9598965ea79SLois Curfman McInnes   }
9608965ea79SLois Curfman McInnes   rstart = rowners[rank];
9618965ea79SLois Curfman McInnes   rend   = rowners[rank+1];
9628965ea79SLois Curfman McInnes 
9638965ea79SLois Curfman McInnes   /* distribute row lengths to all processors */
9640452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
9658965ea79SLois Curfman McInnes   offlens = ourlens + (rend-rstart);
9668965ea79SLois Curfman McInnes   if (!rank) {
9670452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
9688965ea79SLois Curfman McInnes     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
9690452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
9708965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
9718965ea79SLois Curfman McInnes     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
9720452661fSBarry Smith     PetscFree(sndcounts);
9738965ea79SLois Curfman McInnes   }
9748965ea79SLois Curfman McInnes   else {
9758965ea79SLois Curfman McInnes     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
9768965ea79SLois Curfman McInnes   }
9778965ea79SLois Curfman McInnes 
9788965ea79SLois Curfman McInnes   if (!rank) {
9798965ea79SLois Curfman McInnes     /* calculate the number of nonzeros on each processor */
9800452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
981cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
9828965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
9838965ea79SLois Curfman McInnes       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
9848965ea79SLois Curfman McInnes         procsnz[i] += rowlengths[j];
9858965ea79SLois Curfman McInnes       }
9868965ea79SLois Curfman McInnes     }
9870452661fSBarry Smith     PetscFree(rowlengths);
9888965ea79SLois Curfman McInnes 
9898965ea79SLois Curfman McInnes     /* determine max buffer needed and allocate it */
9908965ea79SLois Curfman McInnes     maxnz = 0;
9918965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
9920452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
9938965ea79SLois Curfman McInnes     }
9940452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
9958965ea79SLois Curfman McInnes 
9968965ea79SLois Curfman McInnes     /* read in my part of the matrix column indices  */
9978965ea79SLois Curfman McInnes     nz = procsnz[0];
9980452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
9998965ea79SLois Curfman McInnes     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
10008965ea79SLois Curfman McInnes 
10018965ea79SLois Curfman McInnes     /* read in every one elses and ship off */
10028965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
10038965ea79SLois Curfman McInnes       nz = procsnz[i];
10048965ea79SLois Curfman McInnes       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
10058965ea79SLois Curfman McInnes       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
10068965ea79SLois Curfman McInnes     }
10070452661fSBarry Smith     PetscFree(cols);
10088965ea79SLois Curfman McInnes   }
10098965ea79SLois Curfman McInnes   else {
10108965ea79SLois Curfman McInnes     /* determine buffer space needed for message */
10118965ea79SLois Curfman McInnes     nz = 0;
10128965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10138965ea79SLois Curfman McInnes       nz += ourlens[i];
10148965ea79SLois Curfman McInnes     }
10150452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
10168965ea79SLois Curfman McInnes 
10178965ea79SLois Curfman McInnes     /* receive message of column indices*/
10188965ea79SLois Curfman McInnes     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
10198965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPI_INT,&maxnz);
102039ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
10218965ea79SLois Curfman McInnes   }
10228965ea79SLois Curfman McInnes 
10238965ea79SLois Curfman McInnes   /* loop over local rows, determining number of off diagonal entries */
1024cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
10258965ea79SLois Curfman McInnes   jj = 0;
10268965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10278965ea79SLois Curfman McInnes     for ( j=0; j<ourlens[i]; j++ ) {
10288965ea79SLois Curfman McInnes       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
10298965ea79SLois Curfman McInnes       jj++;
10308965ea79SLois Curfman McInnes     }
10318965ea79SLois Curfman McInnes   }
10328965ea79SLois Curfman McInnes 
10338965ea79SLois Curfman McInnes   /* create our matrix */
10348965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10358965ea79SLois Curfman McInnes     ourlens[i] -= offlens[i];
10368965ea79SLois Curfman McInnes   }
103739ddd567SLois Curfman McInnes   if (type == MATMPIDENSE) {
1038b4fd4287SBarry Smith     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
10398965ea79SLois Curfman McInnes   }
10408965ea79SLois Curfman McInnes   A = *newmat;
10418965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10428965ea79SLois Curfman McInnes     ourlens[i] += offlens[i];
10438965ea79SLois Curfman McInnes   }
10448965ea79SLois Curfman McInnes 
10458965ea79SLois Curfman McInnes   if (!rank) {
10460452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
10478965ea79SLois Curfman McInnes 
10488965ea79SLois Curfman McInnes     /* read in my part of the matrix numerical values  */
10498965ea79SLois Curfman McInnes     nz = procsnz[0];
10508965ea79SLois Curfman McInnes     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
10518965ea79SLois Curfman McInnes 
10528965ea79SLois Curfman McInnes     /* insert into matrix */
10538965ea79SLois Curfman McInnes     jj      = rstart;
10548965ea79SLois Curfman McInnes     smycols = mycols;
10558965ea79SLois Curfman McInnes     svals   = vals;
10568965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10578965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10588965ea79SLois Curfman McInnes       smycols += ourlens[i];
10598965ea79SLois Curfman McInnes       svals   += ourlens[i];
10608965ea79SLois Curfman McInnes       jj++;
10618965ea79SLois Curfman McInnes     }
10628965ea79SLois Curfman McInnes 
10638965ea79SLois Curfman McInnes     /* read in other processors and ship out */
10648965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
10658965ea79SLois Curfman McInnes       nz = procsnz[i];
10668965ea79SLois Curfman McInnes       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
10678965ea79SLois Curfman McInnes       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
10688965ea79SLois Curfman McInnes     }
10690452661fSBarry Smith     PetscFree(procsnz);
10708965ea79SLois Curfman McInnes   }
10718965ea79SLois Curfman McInnes   else {
10728965ea79SLois Curfman McInnes     /* receive numeric values */
10730452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
10748965ea79SLois Curfman McInnes 
10758965ea79SLois Curfman McInnes     /* receive message of values*/
10768965ea79SLois Curfman McInnes     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
10778965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
107839ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
10798965ea79SLois Curfman McInnes 
10808965ea79SLois Curfman McInnes     /* insert into matrix */
10818965ea79SLois Curfman McInnes     jj      = rstart;
10828965ea79SLois Curfman McInnes     smycols = mycols;
10838965ea79SLois Curfman McInnes     svals   = vals;
10848965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10858965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10868965ea79SLois Curfman McInnes       smycols += ourlens[i];
10878965ea79SLois Curfman McInnes       svals   += ourlens[i];
10888965ea79SLois Curfman McInnes       jj++;
10898965ea79SLois Curfman McInnes     }
10908965ea79SLois Curfman McInnes   }
10910452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
10928965ea79SLois Curfman McInnes 
10938965ea79SLois Curfman McInnes   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
10948965ea79SLois Curfman McInnes   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
10958965ea79SLois Curfman McInnes   return 0;
10968965ea79SLois Curfman McInnes }
1097