xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision c456f2949236df7c211b0dd2704e71733d0ab932)
18965ea79SLois Curfman McInnes #ifndef lint
2*c456f294SBarry Smith static char vcid[] = "$Id: mpidense.c,v 1.25 1996/01/12 22:07:01 bsmith Exp bsmith $";
38965ea79SLois Curfman McInnes #endif
48965ea79SLois Curfman McInnes 
5ed3cc1f0SBarry Smith /*
6ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
7ed3cc1f0SBarry Smith */
8ed3cc1f0SBarry Smith 
98965ea79SLois Curfman McInnes #include "mpidense.h"
108965ea79SLois Curfman McInnes #include "vec/vecimpl.h"
118965ea79SLois Curfman McInnes #include "inline/spops.h"
128965ea79SLois Curfman McInnes 
1339ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
148965ea79SLois Curfman McInnes                                int *idxn,Scalar *v,InsertMode addv)
158965ea79SLois Curfman McInnes {
1639b7565bSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
1739b7565bSBarry Smith   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
1839b7565bSBarry Smith   int          roworiented = A->roworiented;
198965ea79SLois Curfman McInnes 
2039b7565bSBarry Smith   if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) {
2139ddd567SLois Curfman McInnes     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
228965ea79SLois Curfman McInnes   }
2339b7565bSBarry Smith   A->insertmode = addv;
248965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
2539ddd567SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
2639b7565bSBarry Smith     if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
278965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
288965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
2939b7565bSBarry Smith       if (roworiented) {
3039b7565bSBarry Smith         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
3139b7565bSBarry Smith       }
3239b7565bSBarry Smith       else {
338965ea79SLois Curfman McInnes         for ( j=0; j<n; j++ ) {
3439ddd567SLois Curfman McInnes           if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
3539b7565bSBarry Smith           if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large");
3639b7565bSBarry Smith           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
3739b7565bSBarry Smith         }
388965ea79SLois Curfman McInnes       }
398965ea79SLois Curfman McInnes     }
408965ea79SLois Curfman McInnes     else {
4139b7565bSBarry Smith       if (roworiented) {
4239b7565bSBarry Smith         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
4339b7565bSBarry Smith       }
4439b7565bSBarry Smith       else { /* must stash each seperately */
4539b7565bSBarry Smith         row = idxm[i];
4639b7565bSBarry Smith         for ( j=0; j<n; j++ ) {
4739b7565bSBarry Smith           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);
4839b7565bSBarry Smith                  CHKERRQ(ierr);
4939b7565bSBarry Smith         }
5039b7565bSBarry Smith       }
51b49de8d1SLois Curfman McInnes     }
52b49de8d1SLois Curfman McInnes   }
53b49de8d1SLois Curfman McInnes   return 0;
54b49de8d1SLois Curfman McInnes }
55b49de8d1SLois Curfman McInnes 
56b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
57b49de8d1SLois Curfman McInnes {
58b49de8d1SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
59b49de8d1SLois Curfman McInnes   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
60b49de8d1SLois Curfman McInnes 
61b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
62b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row");
63b49de8d1SLois Curfman McInnes     if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large");
64b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
65b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
66b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
67b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column");
68b49de8d1SLois Curfman McInnes         if (idxn[j] >= mdn->N)
69b49de8d1SLois Curfman McInnes           SETERRQ(1,"MatGetValues_MPIDense:Column too large");
70b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
71b49de8d1SLois Curfman McInnes       }
72b49de8d1SLois Curfman McInnes     }
73b49de8d1SLois Curfman McInnes     else {
74b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported");
758965ea79SLois Curfman McInnes     }
768965ea79SLois Curfman McInnes   }
778965ea79SLois Curfman McInnes   return 0;
788965ea79SLois Curfman McInnes }
798965ea79SLois Curfman McInnes 
8039ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
818965ea79SLois Curfman McInnes {
8239ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
838965ea79SLois Curfman McInnes   MPI_Comm     comm = mat->comm;
8439ddd567SLois Curfman McInnes   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
858965ea79SLois Curfman McInnes   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
8639ddd567SLois Curfman McInnes   int          tag = mat->tag, *owner,*starts,count,ierr;
878965ea79SLois Curfman McInnes   InsertMode   addv;
8839ddd567SLois Curfman McInnes   MPI_Request  *send_waits,*recv_waits;
898965ea79SLois Curfman McInnes   Scalar       *rvalues,*svalues;
908965ea79SLois Curfman McInnes 
918965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
9239ddd567SLois Curfman McInnes   MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT,
9339ddd567SLois Curfman McInnes                 MPI_BOR,comm);
9439ddd567SLois Curfman McInnes   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
9539ddd567SLois Curfman McInnes     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
968965ea79SLois Curfman McInnes     }
9739ddd567SLois Curfman McInnes   mdn->insertmode = addv; /* in case this processor had no cache */
988965ea79SLois Curfman McInnes 
998965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
1000452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
101cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1020452661fSBarry Smith   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
10339ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
10439ddd567SLois Curfman McInnes     idx = mdn->stash.idx[i];
1058965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1068965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
1078965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1088965ea79SLois Curfman McInnes       }
1098965ea79SLois Curfman McInnes     }
1108965ea79SLois Curfman McInnes   }
1118965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1128965ea79SLois Curfman McInnes 
1138965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
1140452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
11539ddd567SLois Curfman McInnes   MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm);
1168965ea79SLois Curfman McInnes   nreceives = work[rank];
11739ddd567SLois Curfman McInnes   MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm);
1188965ea79SLois Curfman McInnes   nmax = work[rank];
1190452661fSBarry Smith   PetscFree(work);
1208965ea79SLois Curfman McInnes 
1218965ea79SLois Curfman McInnes   /* post receives:
1228965ea79SLois Curfman McInnes        1) each message will consist of ordered pairs
1238965ea79SLois Curfman McInnes      (global index,value) we store the global index as a double
1248965ea79SLois Curfman McInnes      to simplify the message passing.
1258965ea79SLois Curfman McInnes        2) since we don't know how long each individual message is we
1268965ea79SLois Curfman McInnes      allocate the largest needed buffer for each receive. Potentially
1278965ea79SLois Curfman McInnes      this is a lot of wasted space.
1288965ea79SLois Curfman McInnes 
1298965ea79SLois Curfman McInnes        This could be done better.
1308965ea79SLois Curfman McInnes   */
1310452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
1328965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
1330452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
1348965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
1358965ea79SLois Curfman McInnes   for ( i=0; i<nreceives; i++ ) {
13639ddd567SLois Curfman McInnes     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1378965ea79SLois Curfman McInnes               comm,recv_waits+i);
1388965ea79SLois Curfman McInnes   }
1398965ea79SLois Curfman McInnes 
1408965ea79SLois Curfman McInnes   /* do sends:
1418965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
1428965ea79SLois Curfman McInnes          the ith processor
1438965ea79SLois Curfman McInnes   */
1440452661fSBarry Smith   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
14539ddd567SLois Curfman McInnes   CHKPTRQ(svalues);
1460452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
1478965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
1480452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1498965ea79SLois Curfman McInnes   starts[0] = 0;
1508965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
15139ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
15239ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
15339ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
15439ddd567SLois Curfman McInnes     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
1558965ea79SLois Curfman McInnes   }
1560452661fSBarry Smith   PetscFree(owner);
1578965ea79SLois Curfman McInnes   starts[0] = 0;
1588965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1598965ea79SLois Curfman McInnes   count = 0;
1608965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
1618965ea79SLois Curfman McInnes     if (procs[i]) {
16239ddd567SLois Curfman McInnes       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
1638965ea79SLois Curfman McInnes                 comm,send_waits+count++);
1648965ea79SLois Curfman McInnes     }
1658965ea79SLois Curfman McInnes   }
1660452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
1678965ea79SLois Curfman McInnes 
1688965ea79SLois Curfman McInnes   /* Free cache space */
16939ddd567SLois Curfman McInnes   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
1708965ea79SLois Curfman McInnes 
17139ddd567SLois Curfman McInnes   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
17239ddd567SLois Curfman McInnes   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
17339ddd567SLois Curfman McInnes   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
17439ddd567SLois Curfman McInnes   mdn->rmax       = nmax;
1758965ea79SLois Curfman McInnes 
1768965ea79SLois Curfman McInnes   return 0;
1778965ea79SLois Curfman McInnes }
17839ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat);
1798965ea79SLois Curfman McInnes 
18039ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
1818965ea79SLois Curfman McInnes {
18239ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1838965ea79SLois Curfman McInnes   MPI_Status   *send_status,recv_status;
18439ddd567SLois Curfman McInnes   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
1858965ea79SLois Curfman McInnes   Scalar       *values,val;
18639ddd567SLois Curfman McInnes   InsertMode   addv = mdn->insertmode;
1878965ea79SLois Curfman McInnes 
1888965ea79SLois Curfman McInnes   /*  wait on receives */
1898965ea79SLois Curfman McInnes   while (count) {
19039ddd567SLois Curfman McInnes     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
1918965ea79SLois Curfman McInnes     /* unpack receives into our local space */
19239ddd567SLois Curfman McInnes     values = mdn->rvalues + 3*imdex*mdn->rmax;
1938965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
1948965ea79SLois Curfman McInnes     n = n/3;
1958965ea79SLois Curfman McInnes     for ( i=0; i<n; i++ ) {
19639ddd567SLois Curfman McInnes       row = (int) PETSCREAL(values[3*i]) - mdn->rstart;
1978965ea79SLois Curfman McInnes       col = (int) PETSCREAL(values[3*i+1]);
1988965ea79SLois Curfman McInnes       val = values[3*i+2];
19939ddd567SLois Curfman McInnes       if (col >= 0 && col < mdn->N) {
20039ddd567SLois Curfman McInnes         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
2018965ea79SLois Curfman McInnes       }
20239ddd567SLois Curfman McInnes       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
2038965ea79SLois Curfman McInnes     }
2048965ea79SLois Curfman McInnes     count--;
2058965ea79SLois Curfman McInnes   }
2060452661fSBarry Smith   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
2078965ea79SLois Curfman McInnes 
2088965ea79SLois Curfman McInnes   /* wait on sends */
20939ddd567SLois Curfman McInnes   if (mdn->nsends) {
2100452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) );
2118965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
21239ddd567SLois Curfman McInnes     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
2130452661fSBarry Smith     PetscFree(send_status);
2148965ea79SLois Curfman McInnes   }
2150452661fSBarry Smith   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
2168965ea79SLois Curfman McInnes 
21739ddd567SLois Curfman McInnes   mdn->insertmode = NOT_SET_VALUES;
21839ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
21939ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
2208965ea79SLois Curfman McInnes 
221*c456f294SBarry Smith   if (!mat->assembled && mode == FINAL_ASSEMBLY) {
22239ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
2238965ea79SLois Curfman McInnes   }
2248965ea79SLois Curfman McInnes   return 0;
2258965ea79SLois Curfman McInnes }
2268965ea79SLois Curfman McInnes 
22739ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A)
2288965ea79SLois Curfman McInnes {
22939ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
23039ddd567SLois Curfman McInnes   return MatZeroEntries(l->A);
2318965ea79SLois Curfman McInnes }
2328965ea79SLois Curfman McInnes 
2338965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
2348965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
2358965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
2363501a2bdSLois Curfman McInnes    mdn->A and mdn->B directly and not through the MatZeroRows()
2378965ea79SLois Curfman McInnes    routine.
2388965ea79SLois Curfman McInnes */
23939ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
2408965ea79SLois Curfman McInnes {
24139ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
2428965ea79SLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
2438965ea79SLois Curfman McInnes   int            *procs,*nprocs,j,found,idx,nsends,*work;
2448965ea79SLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
2458965ea79SLois Curfman McInnes   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
2468965ea79SLois Curfman McInnes   int            *lens,imdex,*lrows,*values;
2478965ea79SLois Curfman McInnes   MPI_Comm       comm = A->comm;
2488965ea79SLois Curfman McInnes   MPI_Request    *send_waits,*recv_waits;
2498965ea79SLois Curfman McInnes   MPI_Status     recv_status,*send_status;
2508965ea79SLois Curfman McInnes   IS             istmp;
2518965ea79SLois Curfman McInnes 
2528965ea79SLois Curfman McInnes   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
2538965ea79SLois Curfman McInnes   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
2548965ea79SLois Curfman McInnes 
2558965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
2560452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
257cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
2580452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
2598965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2608965ea79SLois Curfman McInnes     idx = rows[i];
2618965ea79SLois Curfman McInnes     found = 0;
2628965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2638965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
2648965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2658965ea79SLois Curfman McInnes       }
2668965ea79SLois Curfman McInnes     }
26739ddd567SLois Curfman McInnes     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
2688965ea79SLois Curfman McInnes   }
2698965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2708965ea79SLois Curfman McInnes 
2718965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
2720452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
2738965ea79SLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
2748965ea79SLois Curfman McInnes   nrecvs = work[rank];
2758965ea79SLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
2768965ea79SLois Curfman McInnes   nmax = work[rank];
2770452661fSBarry Smith   PetscFree(work);
2788965ea79SLois Curfman McInnes 
2798965ea79SLois Curfman McInnes   /* post receives:   */
2800452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
2818965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
2820452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
2838965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
2848965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
2858965ea79SLois Curfman McInnes     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2868965ea79SLois Curfman McInnes   }
2878965ea79SLois Curfman McInnes 
2888965ea79SLois Curfman McInnes   /* do sends:
2898965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
2908965ea79SLois Curfman McInnes          the ith processor
2918965ea79SLois Curfman McInnes   */
2920452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
2930452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
2948965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
2950452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
2968965ea79SLois Curfman McInnes   starts[0] = 0;
2978965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2988965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2998965ea79SLois Curfman McInnes     svalues[starts[owner[i]]++] = rows[i];
3008965ea79SLois Curfman McInnes   }
3018965ea79SLois Curfman McInnes   ISRestoreIndices(is,&rows);
3028965ea79SLois Curfman McInnes 
3038965ea79SLois Curfman McInnes   starts[0] = 0;
3048965ea79SLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3058965ea79SLois Curfman McInnes   count = 0;
3068965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3078965ea79SLois Curfman McInnes     if (procs[i]) {
3088965ea79SLois Curfman McInnes       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3098965ea79SLois Curfman McInnes     }
3108965ea79SLois Curfman McInnes   }
3110452661fSBarry Smith   PetscFree(starts);
3128965ea79SLois Curfman McInnes 
3138965ea79SLois Curfman McInnes   base = owners[rank];
3148965ea79SLois Curfman McInnes 
3158965ea79SLois Curfman McInnes   /*  wait on receives */
3160452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3178965ea79SLois Curfman McInnes   source = lens + nrecvs;
3188965ea79SLois Curfman McInnes   count  = nrecvs; slen = 0;
3198965ea79SLois Curfman McInnes   while (count) {
3208965ea79SLois Curfman McInnes     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3218965ea79SLois Curfman McInnes     /* unpack receives into our local space */
3228965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPI_INT,&n);
3238965ea79SLois Curfman McInnes     source[imdex]  = recv_status.MPI_SOURCE;
3248965ea79SLois Curfman McInnes     lens[imdex]  = n;
3258965ea79SLois Curfman McInnes     slen += n;
3268965ea79SLois Curfman McInnes     count--;
3278965ea79SLois Curfman McInnes   }
3280452661fSBarry Smith   PetscFree(recv_waits);
3298965ea79SLois Curfman McInnes 
3308965ea79SLois Curfman McInnes   /* move the data into the send scatter */
3310452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
3328965ea79SLois Curfman McInnes   count = 0;
3338965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
3348965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
3358965ea79SLois Curfman McInnes     for ( j=0; j<lens[i]; j++ ) {
3368965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
3378965ea79SLois Curfman McInnes     }
3388965ea79SLois Curfman McInnes   }
3390452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
3400452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
3418965ea79SLois Curfman McInnes 
3428965ea79SLois Curfman McInnes   /* actually zap the local rows */
3438965ea79SLois Curfman McInnes   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
3448965ea79SLois Curfman McInnes   PLogObjectParent(A,istmp);
3450452661fSBarry Smith   PetscFree(lrows);
3468965ea79SLois Curfman McInnes   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
3478965ea79SLois Curfman McInnes   ierr = ISDestroy(istmp); CHKERRQ(ierr);
3488965ea79SLois Curfman McInnes 
3498965ea79SLois Curfman McInnes   /* wait on sends */
3508965ea79SLois Curfman McInnes   if (nsends) {
3510452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
3528965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
3538965ea79SLois Curfman McInnes     MPI_Waitall(nsends,send_waits,send_status);
3540452661fSBarry Smith     PetscFree(send_status);
3558965ea79SLois Curfman McInnes   }
3560452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
3578965ea79SLois Curfman McInnes 
3588965ea79SLois Curfman McInnes   return 0;
3598965ea79SLois Curfman McInnes }
3608965ea79SLois Curfman McInnes 
36139ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
3628965ea79SLois Curfman McInnes {
36339ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3648965ea79SLois Curfman McInnes   int          ierr;
365*c456f294SBarry Smith 
36639ddd567SLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
36739ddd567SLois Curfman McInnes   CHKERRQ(ierr);
36839ddd567SLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
36939ddd567SLois Curfman McInnes   CHKERRQ(ierr);
37039ddd567SLois Curfman McInnes   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
3718965ea79SLois Curfman McInnes   return 0;
3728965ea79SLois Curfman McInnes }
3738965ea79SLois Curfman McInnes 
37439ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
3758965ea79SLois Curfman McInnes {
37639ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3778965ea79SLois Curfman McInnes   int          ierr;
378*c456f294SBarry Smith 
379*c456f294SBarry Smith   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
380*c456f294SBarry Smith   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
38139ddd567SLois Curfman McInnes   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
3828965ea79SLois Curfman McInnes   return 0;
3838965ea79SLois Curfman McInnes }
3848965ea79SLois Curfman McInnes 
385096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
386096963f5SLois Curfman McInnes {
387096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
388096963f5SLois Curfman McInnes   int          ierr;
3893501a2bdSLois Curfman McInnes   Scalar       zero = 0.0;
390096963f5SLois Curfman McInnes 
3913501a2bdSLois Curfman McInnes   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
392096963f5SLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
393096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
394096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
395096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
396096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
397096963f5SLois Curfman McInnes   return 0;
398096963f5SLois Curfman McInnes }
399096963f5SLois Curfman McInnes 
400096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
401096963f5SLois Curfman McInnes {
402096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
403096963f5SLois Curfman McInnes   int          ierr;
404096963f5SLois Curfman McInnes 
4053501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
4063501a2bdSLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
407096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
408096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
409096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
410096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
411096963f5SLois Curfman McInnes   return 0;
412096963f5SLois Curfman McInnes }
413096963f5SLois Curfman McInnes 
41439ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v)
4158965ea79SLois Curfman McInnes {
41639ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
417096963f5SLois Curfman McInnes   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
418096963f5SLois Curfman McInnes   int          ierr, i, n, m = a->m, radd;
419096963f5SLois Curfman McInnes   Scalar       *x;
420ed3cc1f0SBarry Smith 
421096963f5SLois Curfman McInnes   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
422096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
423096963f5SLois Curfman McInnes   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
424096963f5SLois Curfman McInnes   radd = a->rstart*m*m;
425096963f5SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
426096963f5SLois Curfman McInnes     x[i] = aloc->v[radd + i*m + i];
427096963f5SLois Curfman McInnes   }
428096963f5SLois Curfman McInnes   return 0;
4298965ea79SLois Curfman McInnes }
4308965ea79SLois Curfman McInnes 
43139ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj)
4328965ea79SLois Curfman McInnes {
4338965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
4343501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4358965ea79SLois Curfman McInnes   int          ierr;
436ed3cc1f0SBarry Smith 
4378965ea79SLois Curfman McInnes #if defined(PETSC_LOG)
4383501a2bdSLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
4398965ea79SLois Curfman McInnes #endif
4400452661fSBarry Smith   PetscFree(mdn->rowners);
4413501a2bdSLois Curfman McInnes   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
4423501a2bdSLois Curfman McInnes   if (mdn->lvec)   VecDestroy(mdn->lvec);
4433501a2bdSLois Curfman McInnes   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
444622d7880SLois Curfman McInnes   if (mdn->factor) {
445622d7880SLois Curfman McInnes     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
446622d7880SLois Curfman McInnes     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
447622d7880SLois Curfman McInnes     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
448622d7880SLois Curfman McInnes     PetscFree(mdn->factor);
449622d7880SLois Curfman McInnes   }
4500452661fSBarry Smith   PetscFree(mdn);
4518965ea79SLois Curfman McInnes   PLogObjectDestroy(mat);
4520452661fSBarry Smith   PetscHeaderDestroy(mat);
4538965ea79SLois Curfman McInnes   return 0;
4548965ea79SLois Curfman McInnes }
45539ddd567SLois Curfman McInnes 
4568965ea79SLois Curfman McInnes #include "pinclude/pviewer.h"
4578965ea79SLois Curfman McInnes 
45839ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
4598965ea79SLois Curfman McInnes {
46039ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4618965ea79SLois Curfman McInnes   int          ierr;
46239ddd567SLois Curfman McInnes   if (mdn->size == 1) {
46339ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4648965ea79SLois Curfman McInnes   }
46539ddd567SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
4668965ea79SLois Curfman McInnes   return 0;
4678965ea79SLois Curfman McInnes }
4688965ea79SLois Curfman McInnes 
46939ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
4708965ea79SLois Curfman McInnes {
47139ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
47239ddd567SLois Curfman McInnes   int          ierr, format;
4738965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
4748965ea79SLois Curfman McInnes   FILE         *fd;
4758965ea79SLois Curfman McInnes 
4763501a2bdSLois Curfman McInnes   ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
4778965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
4788965ea79SLois Curfman McInnes     ierr = ViewerFileGetFormat_Private(viewer,&format);
4793501a2bdSLois Curfman McInnes     if (format == FILE_FORMAT_INFO_DETAILED) {
480096963f5SLois Curfman McInnes       int nz, nzalloc, mem, rank;
481096963f5SLois Curfman McInnes       MPI_Comm_rank(mat->comm,&rank);
482096963f5SLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
483096963f5SLois Curfman McInnes       MPIU_Seq_begin(mat->comm,1);
4843501a2bdSLois Curfman McInnes         fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
485096963f5SLois Curfman McInnes             rank,mdn->m,nz,nzalloc,mem);
486096963f5SLois Curfman McInnes       fflush(fd);
487096963f5SLois Curfman McInnes       MPIU_Seq_end(mat->comm,1);
4883501a2bdSLois Curfman McInnes       ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
4893501a2bdSLois Curfman McInnes       return 0;
4903501a2bdSLois Curfman McInnes     }
4913501a2bdSLois Curfman McInnes     else if (format == FILE_FORMAT_INFO) {
492096963f5SLois Curfman McInnes       return 0;
4938965ea79SLois Curfman McInnes     }
4948965ea79SLois Curfman McInnes   }
4958965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER) {
4968965ea79SLois Curfman McInnes     MPIU_Seq_begin(mat->comm,1);
49739ddd567SLois Curfman McInnes     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
49839ddd567SLois Curfman McInnes              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
49939ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
5008965ea79SLois Curfman McInnes     fflush(fd);
5018965ea79SLois Curfman McInnes     MPIU_Seq_end(mat->comm,1);
5028965ea79SLois Curfman McInnes   }
5038965ea79SLois Curfman McInnes   else {
50439ddd567SLois Curfman McInnes     int size = mdn->size, rank = mdn->rank;
5058965ea79SLois Curfman McInnes     if (size == 1) {
50639ddd567SLois Curfman McInnes       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
5078965ea79SLois Curfman McInnes     }
5088965ea79SLois Curfman McInnes     else {
5098965ea79SLois Curfman McInnes       /* assemble the entire matrix onto first processor. */
5108965ea79SLois Curfman McInnes       Mat          A;
51139ddd567SLois Curfman McInnes       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
51239ddd567SLois Curfman McInnes       Scalar       *vals;
51339ddd567SLois Curfman McInnes       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
5148965ea79SLois Curfman McInnes 
5158965ea79SLois Curfman McInnes       if (!rank) {
516b4fd4287SBarry Smith         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
5178965ea79SLois Curfman McInnes       }
5188965ea79SLois Curfman McInnes       else {
519b4fd4287SBarry Smith         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
5208965ea79SLois Curfman McInnes       }
5218965ea79SLois Curfman McInnes       PLogObjectParent(mat,A);
5228965ea79SLois Curfman McInnes 
52339ddd567SLois Curfman McInnes       /* Copy the matrix ... This isn't the most efficient means,
52439ddd567SLois Curfman McInnes          but it's quick for now */
52539ddd567SLois Curfman McInnes       row = mdn->rstart; m = Amdn->m;
5268965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
52739ddd567SLois Curfman McInnes         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
52839ddd567SLois Curfman McInnes         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
52939ddd567SLois Curfman McInnes         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
53039ddd567SLois Curfman McInnes         row++;
5318965ea79SLois Curfman McInnes       }
5328965ea79SLois Curfman McInnes 
5338965ea79SLois Curfman McInnes       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5348965ea79SLois Curfman McInnes       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5358965ea79SLois Curfman McInnes       if (!rank) {
53639ddd567SLois Curfman McInnes         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
5378965ea79SLois Curfman McInnes       }
5388965ea79SLois Curfman McInnes       ierr = MatDestroy(A); CHKERRQ(ierr);
5398965ea79SLois Curfman McInnes     }
5408965ea79SLois Curfman McInnes   }
5418965ea79SLois Curfman McInnes   return 0;
5428965ea79SLois Curfman McInnes }
5438965ea79SLois Curfman McInnes 
54439ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer)
5458965ea79SLois Curfman McInnes {
5468965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
5478965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
54839ddd567SLois Curfman McInnes   int          ierr;
5498965ea79SLois Curfman McInnes 
5508965ea79SLois Curfman McInnes   if (!viewer) {
5518965ea79SLois Curfman McInnes     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
5528965ea79SLois Curfman McInnes   }
55339ddd567SLois Curfman McInnes   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
55439ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5558965ea79SLois Curfman McInnes   }
5568965ea79SLois Curfman McInnes   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
55739ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5588965ea79SLois Curfman McInnes   }
5598965ea79SLois Curfman McInnes   else if (vobj->type == BINARY_FILE_VIEWER) {
56039ddd567SLois Curfman McInnes     return MatView_MPIDense_Binary(mat,viewer);
5618965ea79SLois Curfman McInnes   }
5628965ea79SLois Curfman McInnes   return 0;
5638965ea79SLois Curfman McInnes }
5648965ea79SLois Curfman McInnes 
5653501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,
5668965ea79SLois Curfman McInnes                              int *nzalloc,int *mem)
5678965ea79SLois Curfman McInnes {
5683501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5693501a2bdSLois Curfman McInnes   Mat          mdn = mat->A;
57039ddd567SLois Curfman McInnes   int          ierr, isend[3], irecv[3];
5718965ea79SLois Curfman McInnes 
5723501a2bdSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
5738965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
5748965ea79SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
5758965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
5763501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
5778965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5788965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
5793501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
5808965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5818965ea79SLois Curfman McInnes   }
5828965ea79SLois Curfman McInnes   return 0;
5838965ea79SLois Curfman McInnes }
5848965ea79SLois Curfman McInnes 
5858c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
5868aaee692SLois Curfman McInnes    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
5878aaee692SLois Curfman McInnes    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
5888aaee692SLois Curfman McInnes    extern int MatSolve_MPIDense(Mat,Vec,Vec);
5898c469469SLois Curfman McInnes    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
5908aaee692SLois Curfman McInnes    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
5918aaee692SLois Curfman McInnes    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
5928aaee692SLois Curfman McInnes 
59339ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op)
5948965ea79SLois Curfman McInnes {
59539ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
5968965ea79SLois Curfman McInnes 
5978965ea79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
5988965ea79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
5998965ea79SLois Curfman McInnes       op == COLUMNS_SORTED ||
6008965ea79SLois Curfman McInnes       op == ROW_ORIENTED) {
6018965ea79SLois Curfman McInnes         MatSetOption(a->A,op);
6028965ea79SLois Curfman McInnes   }
6038965ea79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
6048965ea79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
6058965ea79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
6068965ea79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
60739ddd567SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
6088965ea79SLois Curfman McInnes   else if (op == COLUMN_ORIENTED)
60939b7565bSBarry Smith     { a->roworiented = 0; MatSetOption(a->A,op);}
6108965ea79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
61139ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
6128965ea79SLois Curfman McInnes   else
61339ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
6148965ea79SLois Curfman McInnes   return 0;
6158965ea79SLois Curfman McInnes }
6168965ea79SLois Curfman McInnes 
6173501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n)
6188965ea79SLois Curfman McInnes {
6193501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6208965ea79SLois Curfman McInnes   *m = mat->M; *n = mat->N;
6218965ea79SLois Curfman McInnes   return 0;
6228965ea79SLois Curfman McInnes }
6238965ea79SLois Curfman McInnes 
6243501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
6258965ea79SLois Curfman McInnes {
6263501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6278965ea79SLois Curfman McInnes   *m = mat->m; *n = mat->N;
6288965ea79SLois Curfman McInnes   return 0;
6298965ea79SLois Curfman McInnes }
6308965ea79SLois Curfman McInnes 
6313501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
6328965ea79SLois Curfman McInnes {
6333501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6348965ea79SLois Curfman McInnes   *m = mat->rstart; *n = mat->rend;
6358965ea79SLois Curfman McInnes   return 0;
6368965ea79SLois Curfman McInnes }
6378965ea79SLois Curfman McInnes 
6383501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
6398965ea79SLois Curfman McInnes {
6403501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
64139ddd567SLois Curfman McInnes   int          lrow, rstart = mat->rstart, rend = mat->rend;
6428965ea79SLois Curfman McInnes 
64339ddd567SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
6448965ea79SLois Curfman McInnes   lrow = row - rstart;
64539ddd567SLois Curfman McInnes   return MatGetRow(mat->A,lrow,nz,idx,v);
6468965ea79SLois Curfman McInnes }
6478965ea79SLois Curfman McInnes 
64839ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
6498965ea79SLois Curfman McInnes {
6500452661fSBarry Smith   if (idx) PetscFree(*idx);
6510452661fSBarry Smith   if (v) PetscFree(*v);
6528965ea79SLois Curfman McInnes   return 0;
6538965ea79SLois Curfman McInnes }
6548965ea79SLois Curfman McInnes 
655cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
656096963f5SLois Curfman McInnes {
6573501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
6583501a2bdSLois Curfman McInnes   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
6593501a2bdSLois Curfman McInnes   int          ierr, i, j;
6603501a2bdSLois Curfman McInnes   double       sum = 0.0;
6613501a2bdSLois Curfman McInnes   Scalar       *v = mat->v;
6623501a2bdSLois Curfman McInnes 
6633501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
6643501a2bdSLois Curfman McInnes     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
6653501a2bdSLois Curfman McInnes   } else {
6663501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
6673501a2bdSLois Curfman McInnes       for (i=0; i<mat->n*mat->m; i++ ) {
6683501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX)
6693501a2bdSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
6703501a2bdSLois Curfman McInnes #else
6713501a2bdSLois Curfman McInnes         sum += (*v)*(*v); v++;
6723501a2bdSLois Curfman McInnes #endif
6733501a2bdSLois Curfman McInnes       }
6743501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
6753501a2bdSLois Curfman McInnes       *norm = sqrt(*norm);
6763501a2bdSLois Curfman McInnes       PLogFlops(2*mat->n*mat->m);
6773501a2bdSLois Curfman McInnes     }
6783501a2bdSLois Curfman McInnes     else if (type == NORM_1) {
6793501a2bdSLois Curfman McInnes       double *tmp, *tmp2;
6800452661fSBarry Smith       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
6813501a2bdSLois Curfman McInnes       tmp2 = tmp + mdn->N;
682cddf8d76SBarry Smith       PetscMemzero(tmp,2*mdn->N*sizeof(double));
683096963f5SLois Curfman McInnes       *norm = 0.0;
6843501a2bdSLois Curfman McInnes       v = mat->v;
6853501a2bdSLois Curfman McInnes       for ( j=0; j<mat->n; j++ ) {
6863501a2bdSLois Curfman McInnes         for ( i=0; i<mat->m; i++ ) {
68767e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
6883501a2bdSLois Curfman McInnes         }
6893501a2bdSLois Curfman McInnes       }
6903501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
6913501a2bdSLois Curfman McInnes       for ( j=0; j<mdn->N; j++ ) {
6923501a2bdSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
6933501a2bdSLois Curfman McInnes       }
6940452661fSBarry Smith       PetscFree(tmp);
6953501a2bdSLois Curfman McInnes       PLogFlops(mat->n*mat->m);
6963501a2bdSLois Curfman McInnes     }
6973501a2bdSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
6983501a2bdSLois Curfman McInnes       double ntemp;
6993501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
7003501a2bdSLois Curfman McInnes       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
7013501a2bdSLois Curfman McInnes     }
7023501a2bdSLois Curfman McInnes     else {
7033501a2bdSLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
7043501a2bdSLois Curfman McInnes     }
7053501a2bdSLois Curfman McInnes   }
7063501a2bdSLois Curfman McInnes   return 0;
7073501a2bdSLois Curfman McInnes }
7083501a2bdSLois Curfman McInnes 
7093501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout)
7103501a2bdSLois Curfman McInnes {
7113501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
7123501a2bdSLois Curfman McInnes   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
7133501a2bdSLois Curfman McInnes   Mat          B;
7143501a2bdSLois Curfman McInnes   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
7153501a2bdSLois Curfman McInnes   int          j, i, ierr;
7163501a2bdSLois Curfman McInnes   Scalar       *v;
7173501a2bdSLois Curfman McInnes 
7183501a2bdSLois Curfman McInnes   if (!matout && M != N)
7193501a2bdSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
720b4fd4287SBarry Smith   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);
721ed2daf61SLois Curfman McInnes          CHKERRQ(ierr);
7223501a2bdSLois Curfman McInnes 
7233501a2bdSLois Curfman McInnes   m = Aloc->m; n = Aloc->n; v = Aloc->v;
7240452661fSBarry Smith   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
7253501a2bdSLois Curfman McInnes   for ( j=0; j<n; j++ ) {
7263501a2bdSLois Curfman McInnes     for (i=0; i<m; i++) rwork[i] = rstart + i;
7273501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
7283501a2bdSLois Curfman McInnes     v += m;
7293501a2bdSLois Curfman McInnes   }
7300452661fSBarry Smith   PetscFree(rwork);
7313501a2bdSLois Curfman McInnes   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
7323501a2bdSLois Curfman McInnes   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
7333501a2bdSLois Curfman McInnes   if (matout) {
7343501a2bdSLois Curfman McInnes     *matout = B;
7353501a2bdSLois Curfman McInnes   } else {
7363501a2bdSLois Curfman McInnes     /* This isn't really an in-place transpose, but free data struct from a */
7370452661fSBarry Smith     PetscFree(a->rowners);
7383501a2bdSLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
7393501a2bdSLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
7403501a2bdSLois Curfman McInnes     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
7410452661fSBarry Smith     PetscFree(a);
7423501a2bdSLois Curfman McInnes     PetscMemcpy(A,B,sizeof(struct _Mat));
7430452661fSBarry Smith     PetscHeaderDestroy(B);
7443501a2bdSLois Curfman McInnes   }
745096963f5SLois Curfman McInnes   return 0;
746096963f5SLois Curfman McInnes }
747096963f5SLois Curfman McInnes 
7483d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat,Mat *,int);
7498965ea79SLois Curfman McInnes 
7508965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
75139ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense,
75239ddd567SLois Curfman McInnes        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
75339ddd567SLois Curfman McInnes        MatMult_MPIDense,MatMultAdd_MPIDense,
754096963f5SLois Curfman McInnes        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
755e7ca0642SLois Curfman McInnes /*       MatSolve_MPIDense,0, */
756e7ca0642SLois Curfman McInnes        0,0,
75739ddd567SLois Curfman McInnes        0,0,
7588c469469SLois Curfman McInnes        0,0,
7598c469469SLois Curfman McInnes /*       MatLUFactor_MPIDense,0, */
7608aaee692SLois Curfman McInnes        0,MatTranspose_MPIDense,
76139ddd567SLois Curfman McInnes        MatGetInfo_MPIDense,0,
762096963f5SLois Curfman McInnes        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
76339ddd567SLois Curfman McInnes        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
7648965ea79SLois Curfman McInnes        0,
76539ddd567SLois Curfman McInnes        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
7668c469469SLois Curfman McInnes        0,0,0,
7678c469469SLois Curfman McInnes /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
7688aaee692SLois Curfman McInnes        0,0,
76939ddd567SLois Curfman McInnes        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
77039ddd567SLois Curfman McInnes        MatGetOwnershipRange_MPIDense,
77139ddd567SLois Curfman McInnes        0,0,
7723d1612f7SBarry Smith        0,0,0,0,0,MatConvertSameType_MPIDense,
773b49de8d1SLois Curfman McInnes        0,0,0,0,0,
774b49de8d1SLois Curfman McInnes        0,0,MatGetValues_MPIDense};
7758965ea79SLois Curfman McInnes 
7768965ea79SLois Curfman McInnes /*@C
77739ddd567SLois Curfman McInnes    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
7788965ea79SLois Curfman McInnes 
7798965ea79SLois Curfman McInnes    Input Parameters:
7808965ea79SLois Curfman McInnes .  comm - MPI communicator
7818965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
7828965ea79SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
7838965ea79SLois Curfman McInnes            if N is given)
7848965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
7858965ea79SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
7868965ea79SLois Curfman McInnes            if n is given)
787b4fd4287SBarry Smith .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
788dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
7898965ea79SLois Curfman McInnes 
7908965ea79SLois Curfman McInnes    Output Parameter:
7918965ea79SLois Curfman McInnes .  newmat - the matrix
7928965ea79SLois Curfman McInnes 
7938965ea79SLois Curfman McInnes    Notes:
79439ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
79539ddd567SLois Curfman McInnes    storage by columns.
7968965ea79SLois Curfman McInnes 
79718f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
79818f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
799b4fd4287SBarry Smith    set data=PETSC_NULL.
80018f449edSLois Curfman McInnes 
8018965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
8028965ea79SLois Curfman McInnes    (possibly both).
8038965ea79SLois Curfman McInnes 
8043501a2bdSLois Curfman McInnes    Currently, the only parallel dense matrix decomposition is by rows,
8053501a2bdSLois Curfman McInnes    so that n=N and each submatrix owns all of the global columns.
8063501a2bdSLois Curfman McInnes 
80739ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel
8088965ea79SLois Curfman McInnes 
80939ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
8108965ea79SLois Curfman McInnes @*/
81118f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat)
8128965ea79SLois Curfman McInnes {
8138965ea79SLois Curfman McInnes   Mat          mat;
81439ddd567SLois Curfman McInnes   Mat_MPIDense *a;
81525cdf11fSBarry Smith   int          ierr, i,flg;
8168965ea79SLois Curfman McInnes 
817ed2daf61SLois Curfman McInnes /* Note:  For now, when data is specified above, this assumes the user correctly
818ed2daf61SLois Curfman McInnes    allocates the local dense storage space.  We should add error checking. */
81918f449edSLois Curfman McInnes 
8208965ea79SLois Curfman McInnes   *newmat         = 0;
8210452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
8228965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
8230452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
8248965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
82539ddd567SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIDense;
82639ddd567SLois Curfman McInnes   mat->view       = MatView_MPIDense;
8278965ea79SLois Curfman McInnes   mat->factor     = 0;
8288965ea79SLois Curfman McInnes 
829622d7880SLois Curfman McInnes   a->factor = 0;
8308965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
8318965ea79SLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
8328965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
8338965ea79SLois Curfman McInnes 
83439ddd567SLois Curfman McInnes   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
8358965ea79SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
83639ddd567SLois Curfman McInnes 
83739ddd567SLois Curfman McInnes   /* each row stores all columns */
83839ddd567SLois Curfman McInnes   if (N == PETSC_DECIDE) N = n;
839d7e8b826SBarry Smith   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
840d7e8b826SBarry Smith   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
8418965ea79SLois Curfman McInnes   a->N = N;
8428965ea79SLois Curfman McInnes   a->M = M;
84339ddd567SLois Curfman McInnes   a->m = m;
84439ddd567SLois Curfman McInnes   a->n = n;
8458965ea79SLois Curfman McInnes 
8468965ea79SLois Curfman McInnes   /* build local table of row and column ownerships */
847d7e8b826SBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
848d7e8b826SBarry Smith   a->cowners = a->rowners + a->size + 1;
849d7e8b826SBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
85039ddd567SLois Curfman McInnes                        sizeof(Mat_MPIDense));
8518965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
8528965ea79SLois Curfman McInnes   a->rowners[0] = 0;
8538965ea79SLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
8548965ea79SLois Curfman McInnes     a->rowners[i] += a->rowners[i-1];
8558965ea79SLois Curfman McInnes   }
8568965ea79SLois Curfman McInnes   a->rstart = a->rowners[a->rank];
8578965ea79SLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
858d7e8b826SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
859d7e8b826SBarry Smith   a->cowners[0] = 0;
860d7e8b826SBarry Smith   for ( i=2; i<=a->size; i++ ) {
861d7e8b826SBarry Smith     a->cowners[i] += a->cowners[i-1];
862d7e8b826SBarry Smith   }
8638965ea79SLois Curfman McInnes 
86418f449edSLois Curfman McInnes   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
8658965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
8668965ea79SLois Curfman McInnes 
8678965ea79SLois Curfman McInnes   /* build cache for off array entries formed */
8688965ea79SLois Curfman McInnes   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
8698965ea79SLois Curfman McInnes 
8708965ea79SLois Curfman McInnes   /* stuff used for matrix vector multiply */
8718965ea79SLois Curfman McInnes   a->lvec        = 0;
8728965ea79SLois Curfman McInnes   a->Mvctx       = 0;
87339b7565bSBarry Smith   a->roworiented = 1;
8748965ea79SLois Curfman McInnes 
8758965ea79SLois Curfman McInnes   *newmat = mat;
87625cdf11fSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
87725cdf11fSBarry Smith   if (flg) {
8788c469469SLois Curfman McInnes     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
8798c469469SLois Curfman McInnes   }
8808965ea79SLois Curfman McInnes   return 0;
8818965ea79SLois Curfman McInnes }
8828965ea79SLois Curfman McInnes 
8833d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
8848965ea79SLois Curfman McInnes {
8858965ea79SLois Curfman McInnes   Mat          mat;
8863501a2bdSLois Curfman McInnes   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
88739ddd567SLois Curfman McInnes   int          ierr;
8882ba99913SLois Curfman McInnes   FactorCtx    *factor;
8898965ea79SLois Curfman McInnes 
8908965ea79SLois Curfman McInnes   *newmat       = 0;
8910452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
8928965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
8930452661fSBarry Smith   mat->data     = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
8948965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
89539ddd567SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIDense;
89639ddd567SLois Curfman McInnes   mat->view     = MatView_MPIDense;
8973501a2bdSLois Curfman McInnes   mat->factor   = A->factor;
898*c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
8998965ea79SLois Curfman McInnes 
9008965ea79SLois Curfman McInnes   a->m          = oldmat->m;
9018965ea79SLois Curfman McInnes   a->n          = oldmat->n;
9028965ea79SLois Curfman McInnes   a->M          = oldmat->M;
9038965ea79SLois Curfman McInnes   a->N          = oldmat->N;
9042ba99913SLois Curfman McInnes   if (oldmat->factor) {
9052ba99913SLois Curfman McInnes     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
9062ba99913SLois Curfman McInnes     /* copy factor contents ... add this code! */
9072ba99913SLois Curfman McInnes   } else a->factor = 0;
9088965ea79SLois Curfman McInnes 
9098965ea79SLois Curfman McInnes   a->rstart     = oldmat->rstart;
9108965ea79SLois Curfman McInnes   a->rend       = oldmat->rend;
9118965ea79SLois Curfman McInnes   a->size       = oldmat->size;
9128965ea79SLois Curfman McInnes   a->rank       = oldmat->rank;
9138965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
9148965ea79SLois Curfman McInnes 
9150452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
91639ddd567SLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
9178965ea79SLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
9188965ea79SLois Curfman McInnes   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
9198965ea79SLois Curfman McInnes 
9208965ea79SLois Curfman McInnes   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
9218965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->lvec);
92255659b69SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
9238965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->Mvctx);
9248965ea79SLois Curfman McInnes   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
9258965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
9268965ea79SLois Curfman McInnes   *newmat = mat;
9278965ea79SLois Curfman McInnes   return 0;
9288965ea79SLois Curfman McInnes }
9298965ea79SLois Curfman McInnes 
9308965ea79SLois Curfman McInnes #include "sysio.h"
9318965ea79SLois Curfman McInnes 
93239ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
9338965ea79SLois Curfman McInnes {
9348965ea79SLois Curfman McInnes   Mat          A;
9358965ea79SLois Curfman McInnes   int          i, nz, ierr, j,rstart, rend, fd;
9368965ea79SLois Curfman McInnes   Scalar       *vals,*svals;
9378965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) bview;
9388965ea79SLois Curfman McInnes   MPI_Comm     comm = vobj->comm;
9398965ea79SLois Curfman McInnes   MPI_Status   status;
9408965ea79SLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
9418965ea79SLois Curfman McInnes   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
9428965ea79SLois Curfman McInnes   int          tag = ((PetscObject)bview)->tag;
9438965ea79SLois Curfman McInnes 
9448965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
9458965ea79SLois Curfman McInnes   if (!rank) {
9468965ea79SLois Curfman McInnes     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
9478965ea79SLois Curfman McInnes     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
94839ddd567SLois Curfman McInnes     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
9498965ea79SLois Curfman McInnes   }
9508965ea79SLois Curfman McInnes 
9518965ea79SLois Curfman McInnes   MPI_Bcast(header+1,3,MPI_INT,0,comm);
9528965ea79SLois Curfman McInnes   M = header[1]; N = header[2];
9538965ea79SLois Curfman McInnes   /* determine ownership of all rows */
9548965ea79SLois Curfman McInnes   m = M/size + ((M % size) > rank);
9550452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
9568965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
9578965ea79SLois Curfman McInnes   rowners[0] = 0;
9588965ea79SLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
9598965ea79SLois Curfman McInnes     rowners[i] += rowners[i-1];
9608965ea79SLois Curfman McInnes   }
9618965ea79SLois Curfman McInnes   rstart = rowners[rank];
9628965ea79SLois Curfman McInnes   rend   = rowners[rank+1];
9638965ea79SLois Curfman McInnes 
9648965ea79SLois Curfman McInnes   /* distribute row lengths to all processors */
9650452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
9668965ea79SLois Curfman McInnes   offlens = ourlens + (rend-rstart);
9678965ea79SLois Curfman McInnes   if (!rank) {
9680452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
9698965ea79SLois Curfman McInnes     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
9700452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
9718965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
9728965ea79SLois Curfman McInnes     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
9730452661fSBarry Smith     PetscFree(sndcounts);
9748965ea79SLois Curfman McInnes   }
9758965ea79SLois Curfman McInnes   else {
9768965ea79SLois Curfman McInnes     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
9778965ea79SLois Curfman McInnes   }
9788965ea79SLois Curfman McInnes 
9798965ea79SLois Curfman McInnes   if (!rank) {
9808965ea79SLois Curfman McInnes     /* calculate the number of nonzeros on each processor */
9810452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
982cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
9838965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
9848965ea79SLois Curfman McInnes       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
9858965ea79SLois Curfman McInnes         procsnz[i] += rowlengths[j];
9868965ea79SLois Curfman McInnes       }
9878965ea79SLois Curfman McInnes     }
9880452661fSBarry Smith     PetscFree(rowlengths);
9898965ea79SLois Curfman McInnes 
9908965ea79SLois Curfman McInnes     /* determine max buffer needed and allocate it */
9918965ea79SLois Curfman McInnes     maxnz = 0;
9928965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
9930452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
9948965ea79SLois Curfman McInnes     }
9950452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
9968965ea79SLois Curfman McInnes 
9978965ea79SLois Curfman McInnes     /* read in my part of the matrix column indices  */
9988965ea79SLois Curfman McInnes     nz = procsnz[0];
9990452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
10008965ea79SLois Curfman McInnes     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
10018965ea79SLois Curfman McInnes 
10028965ea79SLois Curfman McInnes     /* read in every one elses and ship off */
10038965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
10048965ea79SLois Curfman McInnes       nz = procsnz[i];
10058965ea79SLois Curfman McInnes       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
10068965ea79SLois Curfman McInnes       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
10078965ea79SLois Curfman McInnes     }
10080452661fSBarry Smith     PetscFree(cols);
10098965ea79SLois Curfman McInnes   }
10108965ea79SLois Curfman McInnes   else {
10118965ea79SLois Curfman McInnes     /* determine buffer space needed for message */
10128965ea79SLois Curfman McInnes     nz = 0;
10138965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10148965ea79SLois Curfman McInnes       nz += ourlens[i];
10158965ea79SLois Curfman McInnes     }
10160452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
10178965ea79SLois Curfman McInnes 
10188965ea79SLois Curfman McInnes     /* receive message of column indices*/
10198965ea79SLois Curfman McInnes     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
10208965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPI_INT,&maxnz);
102139ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
10228965ea79SLois Curfman McInnes   }
10238965ea79SLois Curfman McInnes 
10248965ea79SLois Curfman McInnes   /* loop over local rows, determining number of off diagonal entries */
1025cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
10268965ea79SLois Curfman McInnes   jj = 0;
10278965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10288965ea79SLois Curfman McInnes     for ( j=0; j<ourlens[i]; j++ ) {
10298965ea79SLois Curfman McInnes       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
10308965ea79SLois Curfman McInnes       jj++;
10318965ea79SLois Curfman McInnes     }
10328965ea79SLois Curfman McInnes   }
10338965ea79SLois Curfman McInnes 
10348965ea79SLois Curfman McInnes   /* create our matrix */
10358965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10368965ea79SLois Curfman McInnes     ourlens[i] -= offlens[i];
10378965ea79SLois Curfman McInnes   }
103839ddd567SLois Curfman McInnes   if (type == MATMPIDENSE) {
1039b4fd4287SBarry Smith     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat); CHKERRQ(ierr);
10408965ea79SLois Curfman McInnes   }
10418965ea79SLois Curfman McInnes   A = *newmat;
10428965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10438965ea79SLois Curfman McInnes     ourlens[i] += offlens[i];
10448965ea79SLois Curfman McInnes   }
10458965ea79SLois Curfman McInnes 
10468965ea79SLois Curfman McInnes   if (!rank) {
10470452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
10488965ea79SLois Curfman McInnes 
10498965ea79SLois Curfman McInnes     /* read in my part of the matrix numerical values  */
10508965ea79SLois Curfman McInnes     nz = procsnz[0];
10518965ea79SLois Curfman McInnes     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
10528965ea79SLois Curfman McInnes 
10538965ea79SLois Curfman McInnes     /* insert into matrix */
10548965ea79SLois Curfman McInnes     jj      = rstart;
10558965ea79SLois Curfman McInnes     smycols = mycols;
10568965ea79SLois Curfman McInnes     svals   = vals;
10578965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10588965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10598965ea79SLois Curfman McInnes       smycols += ourlens[i];
10608965ea79SLois Curfman McInnes       svals   += ourlens[i];
10618965ea79SLois Curfman McInnes       jj++;
10628965ea79SLois Curfman McInnes     }
10638965ea79SLois Curfman McInnes 
10648965ea79SLois Curfman McInnes     /* read in other processors and ship out */
10658965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
10668965ea79SLois Curfman McInnes       nz = procsnz[i];
10678965ea79SLois Curfman McInnes       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
10688965ea79SLois Curfman McInnes       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
10698965ea79SLois Curfman McInnes     }
10700452661fSBarry Smith     PetscFree(procsnz);
10718965ea79SLois Curfman McInnes   }
10728965ea79SLois Curfman McInnes   else {
10738965ea79SLois Curfman McInnes     /* receive numeric values */
10740452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
10758965ea79SLois Curfman McInnes 
10768965ea79SLois Curfman McInnes     /* receive message of values*/
10778965ea79SLois Curfman McInnes     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
10788965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
107939ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
10808965ea79SLois Curfman McInnes 
10818965ea79SLois Curfman McInnes     /* insert into matrix */
10828965ea79SLois Curfman McInnes     jj      = rstart;
10838965ea79SLois Curfman McInnes     smycols = mycols;
10848965ea79SLois Curfman McInnes     svals   = vals;
10858965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10868965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10878965ea79SLois Curfman McInnes       smycols += ourlens[i];
10888965ea79SLois Curfman McInnes       svals   += ourlens[i];
10898965ea79SLois Curfman McInnes       jj++;
10908965ea79SLois Curfman McInnes     }
10918965ea79SLois Curfman McInnes   }
10920452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
10938965ea79SLois Curfman McInnes 
10948965ea79SLois Curfman McInnes   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
10958965ea79SLois Curfman McInnes   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
10968965ea79SLois Curfman McInnes   return 0;
10978965ea79SLois Curfman McInnes }
1098