xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision ff14e315fb71bb9cd7a13481bbc735a7713ac235)
18965ea79SLois Curfman McInnes #ifndef lint
2*ff14e315SSatish Balay static char vcid[] = "$Id: mpidense.c,v 1.29 1996/03/04 05:15:49 bsmith Exp balay $";
38965ea79SLois Curfman McInnes #endif
48965ea79SLois Curfman McInnes 
5ed3cc1f0SBarry Smith /*
6ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
7ed3cc1f0SBarry Smith */
8ed3cc1f0SBarry Smith 
98965ea79SLois Curfman McInnes #include "mpidense.h"
108965ea79SLois Curfman McInnes #include "vec/vecimpl.h"
118965ea79SLois Curfman McInnes 
1239ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
138965ea79SLois Curfman McInnes                                int *idxn,Scalar *v,InsertMode addv)
148965ea79SLois Curfman McInnes {
1539b7565bSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
1639b7565bSBarry Smith   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
1739b7565bSBarry Smith   int          roworiented = A->roworiented;
188965ea79SLois Curfman McInnes 
1939b7565bSBarry Smith   if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) {
2039ddd567SLois Curfman McInnes     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
218965ea79SLois Curfman McInnes   }
2239b7565bSBarry Smith   A->insertmode = addv;
238965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
2439ddd567SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
2539b7565bSBarry Smith     if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
268965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
278965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
2839b7565bSBarry Smith       if (roworiented) {
2939b7565bSBarry Smith         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
3039b7565bSBarry Smith       }
3139b7565bSBarry Smith       else {
328965ea79SLois Curfman McInnes         for ( j=0; j<n; j++ ) {
3339ddd567SLois Curfman McInnes           if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
3439b7565bSBarry Smith           if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large");
3539b7565bSBarry Smith           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
3639b7565bSBarry Smith         }
378965ea79SLois Curfman McInnes       }
388965ea79SLois Curfman McInnes     }
398965ea79SLois Curfman McInnes     else {
4039b7565bSBarry Smith       if (roworiented) {
4139b7565bSBarry Smith         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
4239b7565bSBarry Smith       }
4339b7565bSBarry Smith       else { /* must stash each seperately */
4439b7565bSBarry Smith         row = idxm[i];
4539b7565bSBarry Smith         for ( j=0; j<n; j++ ) {
4639b7565bSBarry Smith           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);
4739b7565bSBarry Smith                  CHKERRQ(ierr);
4839b7565bSBarry Smith         }
4939b7565bSBarry Smith       }
50b49de8d1SLois Curfman McInnes     }
51b49de8d1SLois Curfman McInnes   }
52b49de8d1SLois Curfman McInnes   return 0;
53b49de8d1SLois Curfman McInnes }
54b49de8d1SLois Curfman McInnes 
55b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
56b49de8d1SLois Curfman McInnes {
57b49de8d1SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
58b49de8d1SLois Curfman McInnes   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
59b49de8d1SLois Curfman McInnes 
60b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
61b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row");
62b49de8d1SLois Curfman McInnes     if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large");
63b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
64b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
65b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
66b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column");
67b49de8d1SLois Curfman McInnes         if (idxn[j] >= mdn->N)
68b49de8d1SLois Curfman McInnes           SETERRQ(1,"MatGetValues_MPIDense:Column too large");
69b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
70b49de8d1SLois Curfman McInnes       }
71b49de8d1SLois Curfman McInnes     }
72b49de8d1SLois Curfman McInnes     else {
73b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported");
748965ea79SLois Curfman McInnes     }
758965ea79SLois Curfman McInnes   }
768965ea79SLois Curfman McInnes   return 0;
778965ea79SLois Curfman McInnes }
788965ea79SLois Curfman McInnes 
79*ff14e315SSatish Balay static int MatGetArray_MPIDense(Mat A,Scalar **array)
80*ff14e315SSatish Balay {
81*ff14e315SSatish Balay   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
82*ff14e315SSatish Balay   int ierr;
83*ff14e315SSatish Balay 
84*ff14e315SSatish Balay   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
85*ff14e315SSatish Balay   return 0;
86*ff14e315SSatish Balay }
87*ff14e315SSatish Balay 
88*ff14e315SSatish Balay static int MatRestoreArray_MPIDense(Mat A,Scalar **array)
89*ff14e315SSatish Balay {
90*ff14e315SSatish Balay   return 0;
91*ff14e315SSatish Balay }
92*ff14e315SSatish Balay 
9339ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
948965ea79SLois Curfman McInnes {
9539ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
968965ea79SLois Curfman McInnes   MPI_Comm     comm = mat->comm;
9739ddd567SLois Curfman McInnes   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
988965ea79SLois Curfman McInnes   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
9939ddd567SLois Curfman McInnes   int          tag = mat->tag, *owner,*starts,count,ierr;
1008965ea79SLois Curfman McInnes   InsertMode   addv;
10139ddd567SLois Curfman McInnes   MPI_Request  *send_waits,*recv_waits;
1028965ea79SLois Curfman McInnes   Scalar       *rvalues,*svalues;
1038965ea79SLois Curfman McInnes 
1048965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
1056d84be18SBarry Smith   MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
10639ddd567SLois Curfman McInnes   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
10739ddd567SLois Curfman McInnes     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
1088965ea79SLois Curfman McInnes     }
10939ddd567SLois Curfman McInnes   mdn->insertmode = addv; /* in case this processor had no cache */
1108965ea79SLois Curfman McInnes 
1118965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
1120452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
113cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1140452661fSBarry Smith   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
11539ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
11639ddd567SLois Curfman McInnes     idx = mdn->stash.idx[i];
1178965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1188965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
1198965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1208965ea79SLois Curfman McInnes       }
1218965ea79SLois Curfman McInnes     }
1228965ea79SLois Curfman McInnes   }
1238965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1248965ea79SLois Curfman McInnes 
1258965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
1260452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1276d84be18SBarry Smith   MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);
1288965ea79SLois Curfman McInnes   nreceives = work[rank];
1296d84be18SBarry Smith   MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);
1308965ea79SLois Curfman McInnes   nmax = work[rank];
1310452661fSBarry Smith   PetscFree(work);
1328965ea79SLois Curfman McInnes 
1338965ea79SLois Curfman McInnes   /* post receives:
1348965ea79SLois Curfman McInnes        1) each message will consist of ordered pairs
1358965ea79SLois Curfman McInnes      (global index,value) we store the global index as a double
1368965ea79SLois Curfman McInnes      to simplify the message passing.
1378965ea79SLois Curfman McInnes        2) since we don't know how long each individual message is we
1388965ea79SLois Curfman McInnes      allocate the largest needed buffer for each receive. Potentially
1398965ea79SLois Curfman McInnes      this is a lot of wasted space.
1408965ea79SLois Curfman McInnes 
1418965ea79SLois Curfman McInnes        This could be done better.
1428965ea79SLois Curfman McInnes   */
1430452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
1448965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
1450452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
1468965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
1478965ea79SLois Curfman McInnes   for ( i=0; i<nreceives; i++ ) {
1486d84be18SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
1498965ea79SLois Curfman McInnes               comm,recv_waits+i);
1508965ea79SLois Curfman McInnes   }
1518965ea79SLois Curfman McInnes 
1528965ea79SLois Curfman McInnes   /* do sends:
1538965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
1548965ea79SLois Curfman McInnes          the ith processor
1558965ea79SLois Curfman McInnes   */
1560452661fSBarry Smith   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
15739ddd567SLois Curfman McInnes   CHKPTRQ(svalues);
1580452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
1598965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
1600452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1618965ea79SLois Curfman McInnes   starts[0] = 0;
1628965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
16339ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
16439ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
16539ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
16639ddd567SLois Curfman McInnes     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
1678965ea79SLois Curfman McInnes   }
1680452661fSBarry Smith   PetscFree(owner);
1698965ea79SLois Curfman McInnes   starts[0] = 0;
1708965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1718965ea79SLois Curfman McInnes   count = 0;
1728965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
1738965ea79SLois Curfman McInnes     if (procs[i]) {
1746d84be18SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
1758965ea79SLois Curfman McInnes                 comm,send_waits+count++);
1768965ea79SLois Curfman McInnes     }
1778965ea79SLois Curfman McInnes   }
1780452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
1798965ea79SLois Curfman McInnes 
1808965ea79SLois Curfman McInnes   /* Free cache space */
18139ddd567SLois Curfman McInnes   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
1828965ea79SLois Curfman McInnes 
18339ddd567SLois Curfman McInnes   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
18439ddd567SLois Curfman McInnes   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
18539ddd567SLois Curfman McInnes   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
18639ddd567SLois Curfman McInnes   mdn->rmax       = nmax;
1878965ea79SLois Curfman McInnes 
1888965ea79SLois Curfman McInnes   return 0;
1898965ea79SLois Curfman McInnes }
19039ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat);
1918965ea79SLois Curfman McInnes 
19239ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
1938965ea79SLois Curfman McInnes {
19439ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1958965ea79SLois Curfman McInnes   MPI_Status   *send_status,recv_status;
19639ddd567SLois Curfman McInnes   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
1978965ea79SLois Curfman McInnes   Scalar       *values,val;
19839ddd567SLois Curfman McInnes   InsertMode   addv = mdn->insertmode;
1998965ea79SLois Curfman McInnes 
2008965ea79SLois Curfman McInnes   /*  wait on receives */
2018965ea79SLois Curfman McInnes   while (count) {
20239ddd567SLois Curfman McInnes     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
2038965ea79SLois Curfman McInnes     /* unpack receives into our local space */
20439ddd567SLois Curfman McInnes     values = mdn->rvalues + 3*imdex*mdn->rmax;
2058965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2068965ea79SLois Curfman McInnes     n = n/3;
2078965ea79SLois Curfman McInnes     for ( i=0; i<n; i++ ) {
208227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - mdn->rstart;
209227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2108965ea79SLois Curfman McInnes       val = values[3*i+2];
21139ddd567SLois Curfman McInnes       if (col >= 0 && col < mdn->N) {
21239ddd567SLois Curfman McInnes         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
2138965ea79SLois Curfman McInnes       }
21439ddd567SLois Curfman McInnes       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
2158965ea79SLois Curfman McInnes     }
2168965ea79SLois Curfman McInnes     count--;
2178965ea79SLois Curfman McInnes   }
2180452661fSBarry Smith   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
2198965ea79SLois Curfman McInnes 
2208965ea79SLois Curfman McInnes   /* wait on sends */
22139ddd567SLois Curfman McInnes   if (mdn->nsends) {
2220452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) );
2238965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
22439ddd567SLois Curfman McInnes     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
2250452661fSBarry Smith     PetscFree(send_status);
2268965ea79SLois Curfman McInnes   }
2270452661fSBarry Smith   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
2288965ea79SLois Curfman McInnes 
22939ddd567SLois Curfman McInnes   mdn->insertmode = NOT_SET_VALUES;
23039ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
23139ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
2328965ea79SLois Curfman McInnes 
233227d817aSBarry Smith   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
23439ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
2358965ea79SLois Curfman McInnes   }
2368965ea79SLois Curfman McInnes   return 0;
2378965ea79SLois Curfman McInnes }
2388965ea79SLois Curfman McInnes 
23939ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A)
2408965ea79SLois Curfman McInnes {
24139ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
24239ddd567SLois Curfman McInnes   return MatZeroEntries(l->A);
2438965ea79SLois Curfman McInnes }
2448965ea79SLois Curfman McInnes 
2458965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
2468965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
2478965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
2483501a2bdSLois Curfman McInnes    mdn->A and mdn->B directly and not through the MatZeroRows()
2498965ea79SLois Curfman McInnes    routine.
2508965ea79SLois Curfman McInnes */
25139ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
2528965ea79SLois Curfman McInnes {
25339ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
2548965ea79SLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
2558965ea79SLois Curfman McInnes   int            *procs,*nprocs,j,found,idx,nsends,*work;
2568965ea79SLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
2578965ea79SLois Curfman McInnes   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
2588965ea79SLois Curfman McInnes   int            *lens,imdex,*lrows,*values;
2598965ea79SLois Curfman McInnes   MPI_Comm       comm = A->comm;
2608965ea79SLois Curfman McInnes   MPI_Request    *send_waits,*recv_waits;
2618965ea79SLois Curfman McInnes   MPI_Status     recv_status,*send_status;
2628965ea79SLois Curfman McInnes   IS             istmp;
2638965ea79SLois Curfman McInnes 
2648965ea79SLois Curfman McInnes   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
2658965ea79SLois Curfman McInnes   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
2668965ea79SLois Curfman McInnes 
2678965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
2680452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
269cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
2700452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
2718965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2728965ea79SLois Curfman McInnes     idx = rows[i];
2738965ea79SLois Curfman McInnes     found = 0;
2748965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2758965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
2768965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2778965ea79SLois Curfman McInnes       }
2788965ea79SLois Curfman McInnes     }
27939ddd567SLois Curfman McInnes     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
2808965ea79SLois Curfman McInnes   }
2818965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2828965ea79SLois Curfman McInnes 
2838965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
2840452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
2858965ea79SLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
2868965ea79SLois Curfman McInnes   nrecvs = work[rank];
2878965ea79SLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
2888965ea79SLois Curfman McInnes   nmax = work[rank];
2890452661fSBarry Smith   PetscFree(work);
2908965ea79SLois Curfman McInnes 
2918965ea79SLois Curfman McInnes   /* post receives:   */
2920452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
2938965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
2940452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
2958965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
2968965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
2978965ea79SLois Curfman McInnes     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2988965ea79SLois Curfman McInnes   }
2998965ea79SLois Curfman McInnes 
3008965ea79SLois Curfman McInnes   /* do sends:
3018965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
3028965ea79SLois Curfman McInnes          the ith processor
3038965ea79SLois Curfman McInnes   */
3040452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3050452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
3068965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
3070452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3088965ea79SLois Curfman McInnes   starts[0] = 0;
3098965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3108965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
3118965ea79SLois Curfman McInnes     svalues[starts[owner[i]]++] = rows[i];
3128965ea79SLois Curfman McInnes   }
3138965ea79SLois Curfman McInnes   ISRestoreIndices(is,&rows);
3148965ea79SLois Curfman McInnes 
3158965ea79SLois Curfman McInnes   starts[0] = 0;
3168965ea79SLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3178965ea79SLois Curfman McInnes   count = 0;
3188965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3198965ea79SLois Curfman McInnes     if (procs[i]) {
3208965ea79SLois Curfman McInnes       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3218965ea79SLois Curfman McInnes     }
3228965ea79SLois Curfman McInnes   }
3230452661fSBarry Smith   PetscFree(starts);
3248965ea79SLois Curfman McInnes 
3258965ea79SLois Curfman McInnes   base = owners[rank];
3268965ea79SLois Curfman McInnes 
3278965ea79SLois Curfman McInnes   /*  wait on receives */
3280452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3298965ea79SLois Curfman McInnes   source = lens + nrecvs;
3308965ea79SLois Curfman McInnes   count  = nrecvs; slen = 0;
3318965ea79SLois Curfman McInnes   while (count) {
3328965ea79SLois Curfman McInnes     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3338965ea79SLois Curfman McInnes     /* unpack receives into our local space */
3348965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPI_INT,&n);
3358965ea79SLois Curfman McInnes     source[imdex]  = recv_status.MPI_SOURCE;
3368965ea79SLois Curfman McInnes     lens[imdex]  = n;
3378965ea79SLois Curfman McInnes     slen += n;
3388965ea79SLois Curfman McInnes     count--;
3398965ea79SLois Curfman McInnes   }
3400452661fSBarry Smith   PetscFree(recv_waits);
3418965ea79SLois Curfman McInnes 
3428965ea79SLois Curfman McInnes   /* move the data into the send scatter */
3430452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
3448965ea79SLois Curfman McInnes   count = 0;
3458965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
3468965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
3478965ea79SLois Curfman McInnes     for ( j=0; j<lens[i]; j++ ) {
3488965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
3498965ea79SLois Curfman McInnes     }
3508965ea79SLois Curfman McInnes   }
3510452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
3520452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
3538965ea79SLois Curfman McInnes 
3548965ea79SLois Curfman McInnes   /* actually zap the local rows */
3558965ea79SLois Curfman McInnes   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
3568965ea79SLois Curfman McInnes   PLogObjectParent(A,istmp);
3570452661fSBarry Smith   PetscFree(lrows);
3588965ea79SLois Curfman McInnes   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
3598965ea79SLois Curfman McInnes   ierr = ISDestroy(istmp); CHKERRQ(ierr);
3608965ea79SLois Curfman McInnes 
3618965ea79SLois Curfman McInnes   /* wait on sends */
3628965ea79SLois Curfman McInnes   if (nsends) {
3630452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
3648965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
3658965ea79SLois Curfman McInnes     MPI_Waitall(nsends,send_waits,send_status);
3660452661fSBarry Smith     PetscFree(send_status);
3678965ea79SLois Curfman McInnes   }
3680452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
3698965ea79SLois Curfman McInnes 
3708965ea79SLois Curfman McInnes   return 0;
3718965ea79SLois Curfman McInnes }
3728965ea79SLois Curfman McInnes 
37339ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
3748965ea79SLois Curfman McInnes {
37539ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3768965ea79SLois Curfman McInnes   int          ierr;
377c456f294SBarry Smith 
37839ddd567SLois Curfman McInnes   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
37939ddd567SLois Curfman McInnes   CHKERRQ(ierr);
38039ddd567SLois Curfman McInnes   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
38139ddd567SLois Curfman McInnes   CHKERRQ(ierr);
38239ddd567SLois Curfman McInnes   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
3838965ea79SLois Curfman McInnes   return 0;
3848965ea79SLois Curfman McInnes }
3858965ea79SLois Curfman McInnes 
38639ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
3878965ea79SLois Curfman McInnes {
38839ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3898965ea79SLois Curfman McInnes   int          ierr;
390c456f294SBarry Smith 
391c456f294SBarry Smith   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
392c456f294SBarry Smith   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
39339ddd567SLois Curfman McInnes   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
3948965ea79SLois Curfman McInnes   return 0;
3958965ea79SLois Curfman McInnes }
3968965ea79SLois Curfman McInnes 
397096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
398096963f5SLois Curfman McInnes {
399096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
400096963f5SLois Curfman McInnes   int          ierr;
4013501a2bdSLois Curfman McInnes   Scalar       zero = 0.0;
402096963f5SLois Curfman McInnes 
4033501a2bdSLois Curfman McInnes   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
404096963f5SLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
405096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
406096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
407096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
408096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
409096963f5SLois Curfman McInnes   return 0;
410096963f5SLois Curfman McInnes }
411096963f5SLois Curfman McInnes 
412096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
413096963f5SLois Curfman McInnes {
414096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
415096963f5SLois Curfman McInnes   int          ierr;
416096963f5SLois Curfman McInnes 
4173501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
4183501a2bdSLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
419096963f5SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
420096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
421096963f5SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
422096963f5SLois Curfman McInnes          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
423096963f5SLois Curfman McInnes   return 0;
424096963f5SLois Curfman McInnes }
425096963f5SLois Curfman McInnes 
42639ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v)
4278965ea79SLois Curfman McInnes {
42839ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
429096963f5SLois Curfman McInnes   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
430096963f5SLois Curfman McInnes   int          ierr, i, n, m = a->m, radd;
431096963f5SLois Curfman McInnes   Scalar       *x;
432ed3cc1f0SBarry Smith 
433096963f5SLois Curfman McInnes   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
434096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
435096963f5SLois Curfman McInnes   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
436096963f5SLois Curfman McInnes   radd = a->rstart*m*m;
437096963f5SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
438096963f5SLois Curfman McInnes     x[i] = aloc->v[radd + i*m + i];
439096963f5SLois Curfman McInnes   }
440096963f5SLois Curfman McInnes   return 0;
4418965ea79SLois Curfman McInnes }
4428965ea79SLois Curfman McInnes 
44339ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj)
4448965ea79SLois Curfman McInnes {
4458965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
4463501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4478965ea79SLois Curfman McInnes   int          ierr;
448ed3cc1f0SBarry Smith 
4498965ea79SLois Curfman McInnes #if defined(PETSC_LOG)
4503501a2bdSLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
4518965ea79SLois Curfman McInnes #endif
4520452661fSBarry Smith   PetscFree(mdn->rowners);
4533501a2bdSLois Curfman McInnes   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
4543501a2bdSLois Curfman McInnes   if (mdn->lvec)   VecDestroy(mdn->lvec);
4553501a2bdSLois Curfman McInnes   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
456622d7880SLois Curfman McInnes   if (mdn->factor) {
457622d7880SLois Curfman McInnes     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
458622d7880SLois Curfman McInnes     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
459622d7880SLois Curfman McInnes     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
460622d7880SLois Curfman McInnes     PetscFree(mdn->factor);
461622d7880SLois Curfman McInnes   }
4620452661fSBarry Smith   PetscFree(mdn);
4638965ea79SLois Curfman McInnes   PLogObjectDestroy(mat);
4640452661fSBarry Smith   PetscHeaderDestroy(mat);
4658965ea79SLois Curfman McInnes   return 0;
4668965ea79SLois Curfman McInnes }
46739ddd567SLois Curfman McInnes 
4688965ea79SLois Curfman McInnes #include "pinclude/pviewer.h"
4698965ea79SLois Curfman McInnes 
47039ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
4718965ea79SLois Curfman McInnes {
47239ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4738965ea79SLois Curfman McInnes   int          ierr;
47439ddd567SLois Curfman McInnes   if (mdn->size == 1) {
47539ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
4768965ea79SLois Curfman McInnes   }
47739ddd567SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
4788965ea79SLois Curfman McInnes   return 0;
4798965ea79SLois Curfman McInnes }
4808965ea79SLois Curfman McInnes 
48139ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
4828965ea79SLois Curfman McInnes {
48339ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
48439ddd567SLois Curfman McInnes   int          ierr, format;
4858965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
4868965ea79SLois Curfman McInnes   FILE         *fd;
4878965ea79SLois Curfman McInnes 
488227d817aSBarry Smith   ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
4898965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
4908965ea79SLois Curfman McInnes     ierr = ViewerFileGetFormat_Private(viewer,&format);
4913501a2bdSLois Curfman McInnes     if (format == FILE_FORMAT_INFO_DETAILED) {
492096963f5SLois Curfman McInnes       int nz, nzalloc, mem, rank;
493096963f5SLois Curfman McInnes       MPI_Comm_rank(mat->comm,&rank);
494096963f5SLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
495096963f5SLois Curfman McInnes       MPIU_Seq_begin(mat->comm,1);
4963501a2bdSLois Curfman McInnes         fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
497096963f5SLois Curfman McInnes             rank,mdn->m,nz,nzalloc,mem);
498096963f5SLois Curfman McInnes       fflush(fd);
499096963f5SLois Curfman McInnes       MPIU_Seq_end(mat->comm,1);
5003501a2bdSLois Curfman McInnes       ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
5013501a2bdSLois Curfman McInnes       return 0;
5023501a2bdSLois Curfman McInnes     }
5033501a2bdSLois Curfman McInnes     else if (format == FILE_FORMAT_INFO) {
504096963f5SLois Curfman McInnes       return 0;
5058965ea79SLois Curfman McInnes     }
5068965ea79SLois Curfman McInnes   }
5078965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER) {
5088965ea79SLois Curfman McInnes     MPIU_Seq_begin(mat->comm,1);
50939ddd567SLois Curfman McInnes     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
51039ddd567SLois Curfman McInnes              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
51139ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
5128965ea79SLois Curfman McInnes     fflush(fd);
5138965ea79SLois Curfman McInnes     MPIU_Seq_end(mat->comm,1);
5148965ea79SLois Curfman McInnes   }
5158965ea79SLois Curfman McInnes   else {
51639ddd567SLois Curfman McInnes     int size = mdn->size, rank = mdn->rank;
5178965ea79SLois Curfman McInnes     if (size == 1) {
51839ddd567SLois Curfman McInnes       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
5198965ea79SLois Curfman McInnes     }
5208965ea79SLois Curfman McInnes     else {
5218965ea79SLois Curfman McInnes       /* assemble the entire matrix onto first processor. */
5228965ea79SLois Curfman McInnes       Mat          A;
52339ddd567SLois Curfman McInnes       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
52439ddd567SLois Curfman McInnes       Scalar       *vals;
52539ddd567SLois Curfman McInnes       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
5268965ea79SLois Curfman McInnes 
5278965ea79SLois Curfman McInnes       if (!rank) {
528b4fd4287SBarry Smith         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
5298965ea79SLois Curfman McInnes       }
5308965ea79SLois Curfman McInnes       else {
531b4fd4287SBarry Smith         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
5328965ea79SLois Curfman McInnes       }
5338965ea79SLois Curfman McInnes       PLogObjectParent(mat,A);
5348965ea79SLois Curfman McInnes 
53539ddd567SLois Curfman McInnes       /* Copy the matrix ... This isn't the most efficient means,
53639ddd567SLois Curfman McInnes          but it's quick for now */
53739ddd567SLois Curfman McInnes       row = mdn->rstart; m = Amdn->m;
5388965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
53939ddd567SLois Curfman McInnes         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
54039ddd567SLois Curfman McInnes         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
54139ddd567SLois Curfman McInnes         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
54239ddd567SLois Curfman McInnes         row++;
5438965ea79SLois Curfman McInnes       }
5448965ea79SLois Curfman McInnes 
5458965ea79SLois Curfman McInnes       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5468965ea79SLois Curfman McInnes       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
5478965ea79SLois Curfman McInnes       if (!rank) {
54839ddd567SLois Curfman McInnes         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
5498965ea79SLois Curfman McInnes       }
5508965ea79SLois Curfman McInnes       ierr = MatDestroy(A); CHKERRQ(ierr);
5518965ea79SLois Curfman McInnes     }
5528965ea79SLois Curfman McInnes   }
5538965ea79SLois Curfman McInnes   return 0;
5548965ea79SLois Curfman McInnes }
5558965ea79SLois Curfman McInnes 
55639ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer)
5578965ea79SLois Curfman McInnes {
5588965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
5598965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) viewer;
56039ddd567SLois Curfman McInnes   int          ierr;
5618965ea79SLois Curfman McInnes 
5628965ea79SLois Curfman McInnes   if (!viewer) {
5638965ea79SLois Curfman McInnes     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
5648965ea79SLois Curfman McInnes   }
56539ddd567SLois Curfman McInnes   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
56639ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5678965ea79SLois Curfman McInnes   }
5688965ea79SLois Curfman McInnes   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
56939ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5708965ea79SLois Curfman McInnes   }
5718965ea79SLois Curfman McInnes   else if (vobj->type == BINARY_FILE_VIEWER) {
57239ddd567SLois Curfman McInnes     return MatView_MPIDense_Binary(mat,viewer);
5738965ea79SLois Curfman McInnes   }
5748965ea79SLois Curfman McInnes   return 0;
5758965ea79SLois Curfman McInnes }
5768965ea79SLois Curfman McInnes 
5773501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,
5788965ea79SLois Curfman McInnes                              int *nzalloc,int *mem)
5798965ea79SLois Curfman McInnes {
5803501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5813501a2bdSLois Curfman McInnes   Mat          mdn = mat->A;
58239ddd567SLois Curfman McInnes   int          ierr, isend[3], irecv[3];
5838965ea79SLois Curfman McInnes 
5843501a2bdSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
5858965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
5868965ea79SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
5878965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
5883501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
5898965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5908965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
5913501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
5928965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
5938965ea79SLois Curfman McInnes   }
5948965ea79SLois Curfman McInnes   return 0;
5958965ea79SLois Curfman McInnes }
5968965ea79SLois Curfman McInnes 
5978c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
5988aaee692SLois Curfman McInnes    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
5998aaee692SLois Curfman McInnes    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
6008aaee692SLois Curfman McInnes    extern int MatSolve_MPIDense(Mat,Vec,Vec);
6018c469469SLois Curfman McInnes    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
6028aaee692SLois Curfman McInnes    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
6038aaee692SLois Curfman McInnes    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
6048aaee692SLois Curfman McInnes 
60539ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op)
6068965ea79SLois Curfman McInnes {
60739ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
6088965ea79SLois Curfman McInnes 
6098965ea79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
6108965ea79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
6118965ea79SLois Curfman McInnes       op == COLUMNS_SORTED ||
6128965ea79SLois Curfman McInnes       op == ROW_ORIENTED) {
6138965ea79SLois Curfman McInnes         MatSetOption(a->A,op);
6148965ea79SLois Curfman McInnes   }
6158965ea79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
6168965ea79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
6178965ea79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
6188965ea79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
61939ddd567SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
6208965ea79SLois Curfman McInnes   else if (op == COLUMN_ORIENTED)
62139b7565bSBarry Smith     { a->roworiented = 0; MatSetOption(a->A,op);}
6228965ea79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
62339ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
6248965ea79SLois Curfman McInnes   else
62539ddd567SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
6268965ea79SLois Curfman McInnes   return 0;
6278965ea79SLois Curfman McInnes }
6288965ea79SLois Curfman McInnes 
6293501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n)
6308965ea79SLois Curfman McInnes {
6313501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6328965ea79SLois Curfman McInnes   *m = mat->M; *n = mat->N;
6338965ea79SLois Curfman McInnes   return 0;
6348965ea79SLois Curfman McInnes }
6358965ea79SLois Curfman McInnes 
6363501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
6378965ea79SLois Curfman McInnes {
6383501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6398965ea79SLois Curfman McInnes   *m = mat->m; *n = mat->N;
6408965ea79SLois Curfman McInnes   return 0;
6418965ea79SLois Curfman McInnes }
6428965ea79SLois Curfman McInnes 
6433501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
6448965ea79SLois Curfman McInnes {
6453501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6468965ea79SLois Curfman McInnes   *m = mat->rstart; *n = mat->rend;
6478965ea79SLois Curfman McInnes   return 0;
6488965ea79SLois Curfman McInnes }
6498965ea79SLois Curfman McInnes 
6503501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
6518965ea79SLois Curfman McInnes {
6523501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
65339ddd567SLois Curfman McInnes   int          lrow, rstart = mat->rstart, rend = mat->rend;
6548965ea79SLois Curfman McInnes 
65539ddd567SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
6568965ea79SLois Curfman McInnes   lrow = row - rstart;
65739ddd567SLois Curfman McInnes   return MatGetRow(mat->A,lrow,nz,idx,v);
6588965ea79SLois Curfman McInnes }
6598965ea79SLois Curfman McInnes 
66039ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
6618965ea79SLois Curfman McInnes {
6620452661fSBarry Smith   if (idx) PetscFree(*idx);
6630452661fSBarry Smith   if (v) PetscFree(*v);
6648965ea79SLois Curfman McInnes   return 0;
6658965ea79SLois Curfman McInnes }
6668965ea79SLois Curfman McInnes 
667cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
668096963f5SLois Curfman McInnes {
6693501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
6703501a2bdSLois Curfman McInnes   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
6713501a2bdSLois Curfman McInnes   int          ierr, i, j;
6723501a2bdSLois Curfman McInnes   double       sum = 0.0;
6733501a2bdSLois Curfman McInnes   Scalar       *v = mat->v;
6743501a2bdSLois Curfman McInnes 
6753501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
6763501a2bdSLois Curfman McInnes     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
6773501a2bdSLois Curfman McInnes   } else {
6783501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
6793501a2bdSLois Curfman McInnes       for (i=0; i<mat->n*mat->m; i++ ) {
6803501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX)
6813501a2bdSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
6823501a2bdSLois Curfman McInnes #else
6833501a2bdSLois Curfman McInnes         sum += (*v)*(*v); v++;
6843501a2bdSLois Curfman McInnes #endif
6853501a2bdSLois Curfman McInnes       }
6866d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
6873501a2bdSLois Curfman McInnes       *norm = sqrt(*norm);
6883501a2bdSLois Curfman McInnes       PLogFlops(2*mat->n*mat->m);
6893501a2bdSLois Curfman McInnes     }
6903501a2bdSLois Curfman McInnes     else if (type == NORM_1) {
6913501a2bdSLois Curfman McInnes       double *tmp, *tmp2;
6920452661fSBarry Smith       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
6933501a2bdSLois Curfman McInnes       tmp2 = tmp + mdn->N;
694cddf8d76SBarry Smith       PetscMemzero(tmp,2*mdn->N*sizeof(double));
695096963f5SLois Curfman McInnes       *norm = 0.0;
6963501a2bdSLois Curfman McInnes       v = mat->v;
6973501a2bdSLois Curfman McInnes       for ( j=0; j<mat->n; j++ ) {
6983501a2bdSLois Curfman McInnes         for ( i=0; i<mat->m; i++ ) {
69967e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
7003501a2bdSLois Curfman McInnes         }
7013501a2bdSLois Curfman McInnes       }
7026d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
7033501a2bdSLois Curfman McInnes       for ( j=0; j<mdn->N; j++ ) {
7043501a2bdSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
7053501a2bdSLois Curfman McInnes       }
7060452661fSBarry Smith       PetscFree(tmp);
7073501a2bdSLois Curfman McInnes       PLogFlops(mat->n*mat->m);
7083501a2bdSLois Curfman McInnes     }
7093501a2bdSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
7103501a2bdSLois Curfman McInnes       double ntemp;
7113501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
7126d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
7133501a2bdSLois Curfman McInnes     }
7143501a2bdSLois Curfman McInnes     else {
7153501a2bdSLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
7163501a2bdSLois Curfman McInnes     }
7173501a2bdSLois Curfman McInnes   }
7183501a2bdSLois Curfman McInnes   return 0;
7193501a2bdSLois Curfman McInnes }
7203501a2bdSLois Curfman McInnes 
7213501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout)
7223501a2bdSLois Curfman McInnes {
7233501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
7243501a2bdSLois Curfman McInnes   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
7253501a2bdSLois Curfman McInnes   Mat          B;
7263501a2bdSLois Curfman McInnes   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
7273501a2bdSLois Curfman McInnes   int          j, i, ierr;
7283501a2bdSLois Curfman McInnes   Scalar       *v;
7293501a2bdSLois Curfman McInnes 
7303638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
7313501a2bdSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
732b4fd4287SBarry Smith   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);
733ed2daf61SLois Curfman McInnes          CHKERRQ(ierr);
7343501a2bdSLois Curfman McInnes 
7353501a2bdSLois Curfman McInnes   m = Aloc->m; n = Aloc->n; v = Aloc->v;
7360452661fSBarry Smith   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
7373501a2bdSLois Curfman McInnes   for ( j=0; j<n; j++ ) {
7383501a2bdSLois Curfman McInnes     for (i=0; i<m; i++) rwork[i] = rstart + i;
7393501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
7403501a2bdSLois Curfman McInnes     v += m;
7413501a2bdSLois Curfman McInnes   }
7420452661fSBarry Smith   PetscFree(rwork);
7433501a2bdSLois Curfman McInnes   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
7443501a2bdSLois Curfman McInnes   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
7453638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
7463501a2bdSLois Curfman McInnes     *matout = B;
7473501a2bdSLois Curfman McInnes   } else {
7483501a2bdSLois Curfman McInnes     /* This isn't really an in-place transpose, but free data struct from a */
7490452661fSBarry Smith     PetscFree(a->rowners);
7503501a2bdSLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
7513501a2bdSLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
7523501a2bdSLois Curfman McInnes     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
7530452661fSBarry Smith     PetscFree(a);
7543501a2bdSLois Curfman McInnes     PetscMemcpy(A,B,sizeof(struct _Mat));
7550452661fSBarry Smith     PetscHeaderDestroy(B);
7563501a2bdSLois Curfman McInnes   }
757096963f5SLois Curfman McInnes   return 0;
758096963f5SLois Curfman McInnes }
759096963f5SLois Curfman McInnes 
7603d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat,Mat *,int);
7618965ea79SLois Curfman McInnes 
7628965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
76339ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense,
76439ddd567SLois Curfman McInnes        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
76539ddd567SLois Curfman McInnes        MatMult_MPIDense,MatMultAdd_MPIDense,
766096963f5SLois Curfman McInnes        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
767e7ca0642SLois Curfman McInnes /*       MatSolve_MPIDense,0, */
768e7ca0642SLois Curfman McInnes        0,0,
76939ddd567SLois Curfman McInnes        0,0,
7708c469469SLois Curfman McInnes        0,0,
7718c469469SLois Curfman McInnes /*       MatLUFactor_MPIDense,0, */
7728aaee692SLois Curfman McInnes        0,MatTranspose_MPIDense,
77339ddd567SLois Curfman McInnes        MatGetInfo_MPIDense,0,
774096963f5SLois Curfman McInnes        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
77539ddd567SLois Curfman McInnes        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
7768965ea79SLois Curfman McInnes        0,
77739ddd567SLois Curfman McInnes        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
7788c469469SLois Curfman McInnes        0,0,0,
7798c469469SLois Curfman McInnes /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
7808aaee692SLois Curfman McInnes        0,0,
78139ddd567SLois Curfman McInnes        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
78239ddd567SLois Curfman McInnes        MatGetOwnershipRange_MPIDense,
783*ff14e315SSatish Balay        0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense,
784*ff14e315SSatish Balay        0,0,0,MatConvertSameType_MPIDense,
785b49de8d1SLois Curfman McInnes        0,0,0,0,0,
786b49de8d1SLois Curfman McInnes        0,0,MatGetValues_MPIDense};
7878965ea79SLois Curfman McInnes 
7888965ea79SLois Curfman McInnes /*@C
78939ddd567SLois Curfman McInnes    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
7908965ea79SLois Curfman McInnes 
7918965ea79SLois Curfman McInnes    Input Parameters:
7928965ea79SLois Curfman McInnes .  comm - MPI communicator
7938965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
7948965ea79SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
7958965ea79SLois Curfman McInnes            if N is given)
7968965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
7978965ea79SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
7988965ea79SLois Curfman McInnes            if n is given)
799b4fd4287SBarry Smith .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
800dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
8018965ea79SLois Curfman McInnes 
8028965ea79SLois Curfman McInnes    Output Parameter:
8038965ea79SLois Curfman McInnes .  newmat - the matrix
8048965ea79SLois Curfman McInnes 
8058965ea79SLois Curfman McInnes    Notes:
80639ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
80739ddd567SLois Curfman McInnes    storage by columns.
8088965ea79SLois Curfman McInnes 
80918f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
81018f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
811b4fd4287SBarry Smith    set data=PETSC_NULL.
81218f449edSLois Curfman McInnes 
8138965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
8148965ea79SLois Curfman McInnes    (possibly both).
8158965ea79SLois Curfman McInnes 
8163501a2bdSLois Curfman McInnes    Currently, the only parallel dense matrix decomposition is by rows,
8173501a2bdSLois Curfman McInnes    so that n=N and each submatrix owns all of the global columns.
8183501a2bdSLois Curfman McInnes 
81939ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel
8208965ea79SLois Curfman McInnes 
82139ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
8228965ea79SLois Curfman McInnes @*/
82318f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat)
8248965ea79SLois Curfman McInnes {
8258965ea79SLois Curfman McInnes   Mat          mat;
82639ddd567SLois Curfman McInnes   Mat_MPIDense *a;
82725cdf11fSBarry Smith   int          ierr, i,flg;
8288965ea79SLois Curfman McInnes 
829ed2daf61SLois Curfman McInnes /* Note:  For now, when data is specified above, this assumes the user correctly
830ed2daf61SLois Curfman McInnes    allocates the local dense storage space.  We should add error checking. */
83118f449edSLois Curfman McInnes 
8328965ea79SLois Curfman McInnes   *newmat         = 0;
8330452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
8348965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
8350452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
8368965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
83739ddd567SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIDense;
83839ddd567SLois Curfman McInnes   mat->view       = MatView_MPIDense;
8398965ea79SLois Curfman McInnes   mat->factor     = 0;
8408965ea79SLois Curfman McInnes 
841622d7880SLois Curfman McInnes   a->factor = 0;
8428965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
8438965ea79SLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
8448965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
8458965ea79SLois Curfman McInnes 
84639ddd567SLois Curfman McInnes   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
8478965ea79SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
84839ddd567SLois Curfman McInnes 
84939ddd567SLois Curfman McInnes   /* each row stores all columns */
85039ddd567SLois Curfman McInnes   if (N == PETSC_DECIDE) N = n;
851d7e8b826SBarry Smith   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
852d7e8b826SBarry Smith   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
8538965ea79SLois Curfman McInnes   a->N = N;
8548965ea79SLois Curfman McInnes   a->M = M;
85539ddd567SLois Curfman McInnes   a->m = m;
85639ddd567SLois Curfman McInnes   a->n = n;
8578965ea79SLois Curfman McInnes 
8588965ea79SLois Curfman McInnes   /* build local table of row and column ownerships */
859d7e8b826SBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
860d7e8b826SBarry Smith   a->cowners = a->rowners + a->size + 1;
861d7e8b826SBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
86239ddd567SLois Curfman McInnes                        sizeof(Mat_MPIDense));
8638965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
8648965ea79SLois Curfman McInnes   a->rowners[0] = 0;
8658965ea79SLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
8668965ea79SLois Curfman McInnes     a->rowners[i] += a->rowners[i-1];
8678965ea79SLois Curfman McInnes   }
8688965ea79SLois Curfman McInnes   a->rstart = a->rowners[a->rank];
8698965ea79SLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
870d7e8b826SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
871d7e8b826SBarry Smith   a->cowners[0] = 0;
872d7e8b826SBarry Smith   for ( i=2; i<=a->size; i++ ) {
873d7e8b826SBarry Smith     a->cowners[i] += a->cowners[i-1];
874d7e8b826SBarry Smith   }
8758965ea79SLois Curfman McInnes 
87618f449edSLois Curfman McInnes   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
8778965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
8788965ea79SLois Curfman McInnes 
8798965ea79SLois Curfman McInnes   /* build cache for off array entries formed */
8808965ea79SLois Curfman McInnes   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
8818965ea79SLois Curfman McInnes 
8828965ea79SLois Curfman McInnes   /* stuff used for matrix vector multiply */
8838965ea79SLois Curfman McInnes   a->lvec        = 0;
8848965ea79SLois Curfman McInnes   a->Mvctx       = 0;
88539b7565bSBarry Smith   a->roworiented = 1;
8868965ea79SLois Curfman McInnes 
8878965ea79SLois Curfman McInnes   *newmat = mat;
88825cdf11fSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
88925cdf11fSBarry Smith   if (flg) {
8908c469469SLois Curfman McInnes     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
8918c469469SLois Curfman McInnes   }
8928965ea79SLois Curfman McInnes   return 0;
8938965ea79SLois Curfman McInnes }
8948965ea79SLois Curfman McInnes 
8953d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
8968965ea79SLois Curfman McInnes {
8978965ea79SLois Curfman McInnes   Mat          mat;
8983501a2bdSLois Curfman McInnes   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
89939ddd567SLois Curfman McInnes   int          ierr;
9002ba99913SLois Curfman McInnes   FactorCtx    *factor;
9018965ea79SLois Curfman McInnes 
9028965ea79SLois Curfman McInnes   *newmat       = 0;
9030452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
9048965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
9050452661fSBarry Smith   mat->data     = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
9068965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
90739ddd567SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIDense;
90839ddd567SLois Curfman McInnes   mat->view     = MatView_MPIDense;
9093501a2bdSLois Curfman McInnes   mat->factor   = A->factor;
910c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
9118965ea79SLois Curfman McInnes 
9128965ea79SLois Curfman McInnes   a->m          = oldmat->m;
9138965ea79SLois Curfman McInnes   a->n          = oldmat->n;
9148965ea79SLois Curfman McInnes   a->M          = oldmat->M;
9158965ea79SLois Curfman McInnes   a->N          = oldmat->N;
9162ba99913SLois Curfman McInnes   if (oldmat->factor) {
9172ba99913SLois Curfman McInnes     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
9182ba99913SLois Curfman McInnes     /* copy factor contents ... add this code! */
9192ba99913SLois Curfman McInnes   } else a->factor = 0;
9208965ea79SLois Curfman McInnes 
9218965ea79SLois Curfman McInnes   a->rstart     = oldmat->rstart;
9228965ea79SLois Curfman McInnes   a->rend       = oldmat->rend;
9238965ea79SLois Curfman McInnes   a->size       = oldmat->size;
9248965ea79SLois Curfman McInnes   a->rank       = oldmat->rank;
9258965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
9268965ea79SLois Curfman McInnes 
9270452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
92839ddd567SLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
9298965ea79SLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
9308965ea79SLois Curfman McInnes   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
9318965ea79SLois Curfman McInnes 
9328965ea79SLois Curfman McInnes   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
9338965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->lvec);
93455659b69SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
9358965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->Mvctx);
9368965ea79SLois Curfman McInnes   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
9378965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
9388965ea79SLois Curfman McInnes   *newmat = mat;
9398965ea79SLois Curfman McInnes   return 0;
9408965ea79SLois Curfman McInnes }
9418965ea79SLois Curfman McInnes 
9428965ea79SLois Curfman McInnes #include "sysio.h"
9438965ea79SLois Curfman McInnes 
94439ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
9458965ea79SLois Curfman McInnes {
9468965ea79SLois Curfman McInnes   Mat          A;
9478965ea79SLois Curfman McInnes   int          i, nz, ierr, j,rstart, rend, fd;
9488965ea79SLois Curfman McInnes   Scalar       *vals,*svals;
9498965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) bview;
9508965ea79SLois Curfman McInnes   MPI_Comm     comm = vobj->comm;
9518965ea79SLois Curfman McInnes   MPI_Status   status;
9528965ea79SLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
9538965ea79SLois Curfman McInnes   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
9548965ea79SLois Curfman McInnes   int          tag = ((PetscObject)bview)->tag;
9558965ea79SLois Curfman McInnes 
9568965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
9578965ea79SLois Curfman McInnes   if (!rank) {
9588965ea79SLois Curfman McInnes     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
9598965ea79SLois Curfman McInnes     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
96039ddd567SLois Curfman McInnes     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
9618965ea79SLois Curfman McInnes   }
9628965ea79SLois Curfman McInnes 
9638965ea79SLois Curfman McInnes   MPI_Bcast(header+1,3,MPI_INT,0,comm);
9648965ea79SLois Curfman McInnes   M = header[1]; N = header[2];
9658965ea79SLois Curfman McInnes   /* determine ownership of all rows */
9668965ea79SLois Curfman McInnes   m = M/size + ((M % size) > rank);
9670452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
9688965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
9698965ea79SLois Curfman McInnes   rowners[0] = 0;
9708965ea79SLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
9718965ea79SLois Curfman McInnes     rowners[i] += rowners[i-1];
9728965ea79SLois Curfman McInnes   }
9738965ea79SLois Curfman McInnes   rstart = rowners[rank];
9748965ea79SLois Curfman McInnes   rend   = rowners[rank+1];
9758965ea79SLois Curfman McInnes 
9768965ea79SLois Curfman McInnes   /* distribute row lengths to all processors */
9770452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
9788965ea79SLois Curfman McInnes   offlens = ourlens + (rend-rstart);
9798965ea79SLois Curfman McInnes   if (!rank) {
9800452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
9818965ea79SLois Curfman McInnes     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
9820452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
9838965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
9848965ea79SLois Curfman McInnes     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
9850452661fSBarry Smith     PetscFree(sndcounts);
9868965ea79SLois Curfman McInnes   }
9878965ea79SLois Curfman McInnes   else {
9888965ea79SLois Curfman McInnes     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
9898965ea79SLois Curfman McInnes   }
9908965ea79SLois Curfman McInnes 
9918965ea79SLois Curfman McInnes   if (!rank) {
9928965ea79SLois Curfman McInnes     /* calculate the number of nonzeros on each processor */
9930452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
994cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
9958965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
9968965ea79SLois Curfman McInnes       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
9978965ea79SLois Curfman McInnes         procsnz[i] += rowlengths[j];
9988965ea79SLois Curfman McInnes       }
9998965ea79SLois Curfman McInnes     }
10000452661fSBarry Smith     PetscFree(rowlengths);
10018965ea79SLois Curfman McInnes 
10028965ea79SLois Curfman McInnes     /* determine max buffer needed and allocate it */
10038965ea79SLois Curfman McInnes     maxnz = 0;
10048965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
10050452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
10068965ea79SLois Curfman McInnes     }
10070452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
10088965ea79SLois Curfman McInnes 
10098965ea79SLois Curfman McInnes     /* read in my part of the matrix column indices  */
10108965ea79SLois Curfman McInnes     nz = procsnz[0];
10110452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
10128965ea79SLois Curfman McInnes     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
10138965ea79SLois Curfman McInnes 
10148965ea79SLois Curfman McInnes     /* read in every one elses and ship off */
10158965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
10168965ea79SLois Curfman McInnes       nz = procsnz[i];
10178965ea79SLois Curfman McInnes       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
10188965ea79SLois Curfman McInnes       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
10198965ea79SLois Curfman McInnes     }
10200452661fSBarry Smith     PetscFree(cols);
10218965ea79SLois Curfman McInnes   }
10228965ea79SLois Curfman McInnes   else {
10238965ea79SLois Curfman McInnes     /* determine buffer space needed for message */
10248965ea79SLois Curfman McInnes     nz = 0;
10258965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10268965ea79SLois Curfman McInnes       nz += ourlens[i];
10278965ea79SLois Curfman McInnes     }
10280452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
10298965ea79SLois Curfman McInnes 
10308965ea79SLois Curfman McInnes     /* receive message of column indices*/
10318965ea79SLois Curfman McInnes     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
10328965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPI_INT,&maxnz);
103339ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
10348965ea79SLois Curfman McInnes   }
10358965ea79SLois Curfman McInnes 
10368965ea79SLois Curfman McInnes   /* loop over local rows, determining number of off diagonal entries */
1037cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
10388965ea79SLois Curfman McInnes   jj = 0;
10398965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10408965ea79SLois Curfman McInnes     for ( j=0; j<ourlens[i]; j++ ) {
10418965ea79SLois Curfman McInnes       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
10428965ea79SLois Curfman McInnes       jj++;
10438965ea79SLois Curfman McInnes     }
10448965ea79SLois Curfman McInnes   }
10458965ea79SLois Curfman McInnes 
10468965ea79SLois Curfman McInnes   /* create our matrix */
10478965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10488965ea79SLois Curfman McInnes     ourlens[i] -= offlens[i];
10498965ea79SLois Curfman McInnes   }
105039ddd567SLois Curfman McInnes   if (type == MATMPIDENSE) {
1051b4fd4287SBarry Smith     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
10528965ea79SLois Curfman McInnes   }
10538965ea79SLois Curfman McInnes   A = *newmat;
10548965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
10558965ea79SLois Curfman McInnes     ourlens[i] += offlens[i];
10568965ea79SLois Curfman McInnes   }
10578965ea79SLois Curfman McInnes 
10588965ea79SLois Curfman McInnes   if (!rank) {
10590452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
10608965ea79SLois Curfman McInnes 
10618965ea79SLois Curfman McInnes     /* read in my part of the matrix numerical values  */
10628965ea79SLois Curfman McInnes     nz = procsnz[0];
10638965ea79SLois Curfman McInnes     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
10648965ea79SLois Curfman McInnes 
10658965ea79SLois Curfman McInnes     /* insert into matrix */
10668965ea79SLois Curfman McInnes     jj      = rstart;
10678965ea79SLois Curfman McInnes     smycols = mycols;
10688965ea79SLois Curfman McInnes     svals   = vals;
10698965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10708965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10718965ea79SLois Curfman McInnes       smycols += ourlens[i];
10728965ea79SLois Curfman McInnes       svals   += ourlens[i];
10738965ea79SLois Curfman McInnes       jj++;
10748965ea79SLois Curfman McInnes     }
10758965ea79SLois Curfman McInnes 
10768965ea79SLois Curfman McInnes     /* read in other processors and ship out */
10778965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
10788965ea79SLois Curfman McInnes       nz = procsnz[i];
10798965ea79SLois Curfman McInnes       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
10808965ea79SLois Curfman McInnes       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
10818965ea79SLois Curfman McInnes     }
10820452661fSBarry Smith     PetscFree(procsnz);
10838965ea79SLois Curfman McInnes   }
10848965ea79SLois Curfman McInnes   else {
10858965ea79SLois Curfman McInnes     /* receive numeric values */
10860452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
10878965ea79SLois Curfman McInnes 
10888965ea79SLois Curfman McInnes     /* receive message of values*/
10898965ea79SLois Curfman McInnes     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
10908965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
109139ddd567SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
10928965ea79SLois Curfman McInnes 
10938965ea79SLois Curfman McInnes     /* insert into matrix */
10948965ea79SLois Curfman McInnes     jj      = rstart;
10958965ea79SLois Curfman McInnes     smycols = mycols;
10968965ea79SLois Curfman McInnes     svals   = vals;
10978965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
10988965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
10998965ea79SLois Curfman McInnes       smycols += ourlens[i];
11008965ea79SLois Curfman McInnes       svals   += ourlens[i];
11018965ea79SLois Curfman McInnes       jj++;
11028965ea79SLois Curfman McInnes     }
11038965ea79SLois Curfman McInnes   }
11040452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
11058965ea79SLois Curfman McInnes 
11068965ea79SLois Curfman McInnes   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
11078965ea79SLois Curfman McInnes   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
11088965ea79SLois Curfman McInnes   return 0;
11098965ea79SLois Curfman McInnes }
1110