xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 0513a670233e070c0fc2d7ef3dc22f950deb2ce6)
18965ea79SLois Curfman McInnes #ifndef lint
2*0513a670SBarry Smith static char vcid[] = "$Id: mpidense.c,v 1.62 1997/01/06 20:24:18 balay Exp bsmith $";
38965ea79SLois Curfman McInnes #endif
48965ea79SLois Curfman McInnes 
5ed3cc1f0SBarry Smith /*
6ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
7ed3cc1f0SBarry Smith */
8ed3cc1f0SBarry Smith 
970f55243SBarry Smith #include "src/mat/impls/dense/mpi/mpidense.h"
10f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
118965ea79SLois Curfman McInnes 
125615d1e5SSatish Balay #undef __FUNC__
135615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIDense"
147056b6fcSBarry Smith static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
158965ea79SLois Curfman McInnes {
1639b7565bSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
1739b7565bSBarry Smith   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
1839b7565bSBarry Smith   int          roworiented = A->roworiented;
198965ea79SLois Curfman McInnes 
2039b7565bSBarry Smith   if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) {
21e3372554SBarry Smith     SETERRQ(1,0,"Cannot mix inserts and adds");
228965ea79SLois Curfman McInnes   }
2339b7565bSBarry Smith   A->insertmode = addv;
248965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
25e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
26e3372554SBarry Smith     if (idxm[i] >= A->M) SETERRQ(1,0,"Row too large");
278965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
288965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
2939b7565bSBarry Smith       if (roworiented) {
3039b7565bSBarry Smith         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
3139b7565bSBarry Smith       }
3239b7565bSBarry Smith       else {
338965ea79SLois Curfman McInnes         for ( j=0; j<n; j++ ) {
34e3372554SBarry Smith           if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
35e3372554SBarry Smith           if (idxn[j] >= A->N) SETERRQ(1,0,"Column too large");
3639b7565bSBarry Smith           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
3739b7565bSBarry Smith         }
388965ea79SLois Curfman McInnes       }
398965ea79SLois Curfman McInnes     }
408965ea79SLois Curfman McInnes     else {
4139b7565bSBarry Smith       if (roworiented) {
4239b7565bSBarry Smith         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
4339b7565bSBarry Smith       }
4439b7565bSBarry Smith       else { /* must stash each seperately */
4539b7565bSBarry Smith         row = idxm[i];
4639b7565bSBarry Smith         for ( j=0; j<n; j++ ) {
477056b6fcSBarry Smith           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
4839b7565bSBarry Smith         }
4939b7565bSBarry Smith       }
50b49de8d1SLois Curfman McInnes     }
51b49de8d1SLois Curfman McInnes   }
52b49de8d1SLois Curfman McInnes   return 0;
53b49de8d1SLois Curfman McInnes }
54b49de8d1SLois Curfman McInnes 
555615d1e5SSatish Balay #undef __FUNC__
565615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIDense"
57b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
58b49de8d1SLois Curfman McInnes {
59b49de8d1SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
60b49de8d1SLois Curfman McInnes   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
61b49de8d1SLois Curfman McInnes 
62b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
63e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
64e3372554SBarry Smith     if (idxm[i] >= mdn->M) SETERRQ(1,0,"Row too large");
65b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
66b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
67b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
68e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
69b49de8d1SLois Curfman McInnes         if (idxn[j] >= mdn->N)
70e3372554SBarry Smith           SETERRQ(1,0,"Column too large");
71b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
72b49de8d1SLois Curfman McInnes       }
73b49de8d1SLois Curfman McInnes     }
74b49de8d1SLois Curfman McInnes     else {
75e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
768965ea79SLois Curfman McInnes     }
778965ea79SLois Curfman McInnes   }
788965ea79SLois Curfman McInnes   return 0;
798965ea79SLois Curfman McInnes }
808965ea79SLois Curfman McInnes 
815615d1e5SSatish Balay #undef __FUNC__
825615d1e5SSatish Balay #define __FUNC__ "MatGetArray_MPIDense"
83ff14e315SSatish Balay static int MatGetArray_MPIDense(Mat A,Scalar **array)
84ff14e315SSatish Balay {
85ff14e315SSatish Balay   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
86ff14e315SSatish Balay   int ierr;
87ff14e315SSatish Balay 
88ff14e315SSatish Balay   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
89ff14e315SSatish Balay   return 0;
90ff14e315SSatish Balay }
91ff14e315SSatish Balay 
925615d1e5SSatish Balay #undef __FUNC__
935615d1e5SSatish Balay #define __FUNC__ "MatRestoreArray_MPIDense"
94ff14e315SSatish Balay static int MatRestoreArray_MPIDense(Mat A,Scalar **array)
95ff14e315SSatish Balay {
96ff14e315SSatish Balay   return 0;
97ff14e315SSatish Balay }
98ff14e315SSatish Balay 
995615d1e5SSatish Balay #undef __FUNC__
1005615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIDense"
10139ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
1028965ea79SLois Curfman McInnes {
10339ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1048965ea79SLois Curfman McInnes   MPI_Comm     comm = mat->comm;
10539ddd567SLois Curfman McInnes   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
1068965ea79SLois Curfman McInnes   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
10739ddd567SLois Curfman McInnes   int          tag = mat->tag, *owner,*starts,count,ierr;
1088965ea79SLois Curfman McInnes   InsertMode   addv;
10939ddd567SLois Curfman McInnes   MPI_Request  *send_waits,*recv_waits;
1108965ea79SLois Curfman McInnes   Scalar       *rvalues,*svalues;
1118965ea79SLois Curfman McInnes 
1128965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
1136d84be18SBarry Smith   MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
1147056b6fcSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
115e3372554SBarry Smith     SETERRQ(1,0,"Cannot mix adds/inserts on different procs");
1168965ea79SLois Curfman McInnes   }
11739ddd567SLois Curfman McInnes   mdn->insertmode = addv; /* in case this processor had no cache */
1188965ea79SLois Curfman McInnes 
1198965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
1200452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
121cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1220452661fSBarry Smith   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
12339ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
12439ddd567SLois Curfman McInnes     idx = mdn->stash.idx[i];
1258965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1268965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
1278965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1288965ea79SLois Curfman McInnes       }
1298965ea79SLois Curfman McInnes     }
1308965ea79SLois Curfman McInnes   }
1318965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1328965ea79SLois Curfman McInnes 
1338965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
1340452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1356d84be18SBarry Smith   MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);
1368965ea79SLois Curfman McInnes   nreceives = work[rank];
137e3372554SBarry Smith   if (nreceives > size) SETERRQ(1,0,"Internal PETSc error");
1386d84be18SBarry Smith   MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);
1398965ea79SLois Curfman McInnes   nmax = work[rank];
1400452661fSBarry Smith   PetscFree(work);
1418965ea79SLois Curfman McInnes 
1428965ea79SLois Curfman McInnes   /* post receives:
1438965ea79SLois Curfman McInnes        1) each message will consist of ordered pairs
1448965ea79SLois Curfman McInnes      (global index,value) we store the global index as a double
1458965ea79SLois Curfman McInnes      to simplify the message passing.
1468965ea79SLois Curfman McInnes        2) since we don't know how long each individual message is we
1478965ea79SLois Curfman McInnes      allocate the largest needed buffer for each receive. Potentially
1488965ea79SLois Curfman McInnes      this is a lot of wasted space.
1498965ea79SLois Curfman McInnes 
1508965ea79SLois Curfman McInnes        This could be done better.
1518965ea79SLois Curfman McInnes   */
1523b2fbd54SBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
1533b2fbd54SBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1548965ea79SLois Curfman McInnes   for ( i=0; i<nreceives; i++ ) {
1557056b6fcSBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1568965ea79SLois Curfman McInnes   }
1578965ea79SLois Curfman McInnes 
1588965ea79SLois Curfman McInnes   /* do sends:
1598965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
1608965ea79SLois Curfman McInnes          the ith processor
1618965ea79SLois Curfman McInnes   */
1623b2fbd54SBarry Smith   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
1633b2fbd54SBarry Smith   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1640452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
1658965ea79SLois Curfman McInnes   starts[0] = 0;
1668965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
16739ddd567SLois Curfman McInnes   for ( i=0; i<mdn->stash.n; i++ ) {
16839ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
16939ddd567SLois Curfman McInnes     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
17039ddd567SLois Curfman McInnes     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
1718965ea79SLois Curfman McInnes   }
1720452661fSBarry Smith   PetscFree(owner);
1738965ea79SLois Curfman McInnes   starts[0] = 0;
1748965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1758965ea79SLois Curfman McInnes   count = 0;
1768965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
1778965ea79SLois Curfman McInnes     if (procs[i]) {
1787056b6fcSBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);
1798965ea79SLois Curfman McInnes     }
1808965ea79SLois Curfman McInnes   }
1810452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
1828965ea79SLois Curfman McInnes 
1838965ea79SLois Curfman McInnes   /* Free cache space */
184d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n);
18539ddd567SLois Curfman McInnes   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
1868965ea79SLois Curfman McInnes 
18739ddd567SLois Curfman McInnes   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
18839ddd567SLois Curfman McInnes   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
18939ddd567SLois Curfman McInnes   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
19039ddd567SLois Curfman McInnes   mdn->rmax       = nmax;
1918965ea79SLois Curfman McInnes 
1928965ea79SLois Curfman McInnes   return 0;
1938965ea79SLois Curfman McInnes }
19439ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat);
1958965ea79SLois Curfman McInnes 
1965615d1e5SSatish Balay #undef __FUNC__
1975615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIDense"
19839ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
1998965ea79SLois Curfman McInnes {
20039ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
2018965ea79SLois Curfman McInnes   MPI_Status   *send_status,recv_status;
20239ddd567SLois Curfman McInnes   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
2038965ea79SLois Curfman McInnes   Scalar       *values,val;
20439ddd567SLois Curfman McInnes   InsertMode   addv = mdn->insertmode;
2058965ea79SLois Curfman McInnes 
2068965ea79SLois Curfman McInnes   /*  wait on receives */
2078965ea79SLois Curfman McInnes   while (count) {
20839ddd567SLois Curfman McInnes     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
2098965ea79SLois Curfman McInnes     /* unpack receives into our local space */
21039ddd567SLois Curfman McInnes     values = mdn->rvalues + 3*imdex*mdn->rmax;
2118965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2128965ea79SLois Curfman McInnes     n = n/3;
2138965ea79SLois Curfman McInnes     for ( i=0; i<n; i++ ) {
214227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - mdn->rstart;
215227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2168965ea79SLois Curfman McInnes       val = values[3*i+2];
21739ddd567SLois Curfman McInnes       if (col >= 0 && col < mdn->N) {
21839ddd567SLois Curfman McInnes         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
2198965ea79SLois Curfman McInnes       }
220e3372554SBarry Smith       else {SETERRQ(1,0,"Invalid column");}
2218965ea79SLois Curfman McInnes     }
2228965ea79SLois Curfman McInnes     count--;
2238965ea79SLois Curfman McInnes   }
2240452661fSBarry Smith   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
2258965ea79SLois Curfman McInnes 
2268965ea79SLois Curfman McInnes   /* wait on sends */
22739ddd567SLois Curfman McInnes   if (mdn->nsends) {
2287056b6fcSBarry Smith     send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
22939ddd567SLois Curfman McInnes     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
2300452661fSBarry Smith     PetscFree(send_status);
2318965ea79SLois Curfman McInnes   }
2320452661fSBarry Smith   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
2338965ea79SLois Curfman McInnes 
23439ddd567SLois Curfman McInnes   mdn->insertmode = NOT_SET_VALUES;
23539ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
23639ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
2378965ea79SLois Curfman McInnes 
2386d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
23939ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
2408965ea79SLois Curfman McInnes   }
2418965ea79SLois Curfman McInnes   return 0;
2428965ea79SLois Curfman McInnes }
2438965ea79SLois Curfman McInnes 
2445615d1e5SSatish Balay #undef __FUNC__
2455615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIDense"
24639ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A)
2478965ea79SLois Curfman McInnes {
24839ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
24939ddd567SLois Curfman McInnes   return MatZeroEntries(l->A);
2508965ea79SLois Curfman McInnes }
2518965ea79SLois Curfman McInnes 
2525615d1e5SSatish Balay #undef __FUNC__
2535615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIDense"
2544e220ebcSLois Curfman McInnes static int MatGetBlockSize_MPIDense(Mat A,int *bs)
2554e220ebcSLois Curfman McInnes {
2564e220ebcSLois Curfman McInnes   *bs = 1;
2574e220ebcSLois Curfman McInnes   return 0;
2584e220ebcSLois Curfman McInnes }
2594e220ebcSLois Curfman McInnes 
2608965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
2618965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
2628965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
2633501a2bdSLois Curfman McInnes    mdn->A and mdn->B directly and not through the MatZeroRows()
2648965ea79SLois Curfman McInnes    routine.
2658965ea79SLois Curfman McInnes */
2665615d1e5SSatish Balay #undef __FUNC__
2675615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIDense"
26839ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
2698965ea79SLois Curfman McInnes {
27039ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
2718965ea79SLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
2728965ea79SLois Curfman McInnes   int            *procs,*nprocs,j,found,idx,nsends,*work;
2738965ea79SLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
2748965ea79SLois Curfman McInnes   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
2758965ea79SLois Curfman McInnes   int            *lens,imdex,*lrows,*values;
2768965ea79SLois Curfman McInnes   MPI_Comm       comm = A->comm;
2778965ea79SLois Curfman McInnes   MPI_Request    *send_waits,*recv_waits;
2788965ea79SLois Curfman McInnes   MPI_Status     recv_status,*send_status;
2798965ea79SLois Curfman McInnes   IS             istmp;
2808965ea79SLois Curfman McInnes 
28177c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
2828965ea79SLois Curfman McInnes   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
2838965ea79SLois Curfman McInnes 
2848965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
2850452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
286cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
2870452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
2888965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2898965ea79SLois Curfman McInnes     idx = rows[i];
2908965ea79SLois Curfman McInnes     found = 0;
2918965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2928965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
2938965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2948965ea79SLois Curfman McInnes       }
2958965ea79SLois Curfman McInnes     }
296e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
2978965ea79SLois Curfman McInnes   }
2988965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2998965ea79SLois Curfman McInnes 
3008965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
3010452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
3028965ea79SLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
3038965ea79SLois Curfman McInnes   nrecvs = work[rank];
3048965ea79SLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
3058965ea79SLois Curfman McInnes   nmax = work[rank];
3060452661fSBarry Smith   PetscFree(work);
3078965ea79SLois Curfman McInnes 
3088965ea79SLois Curfman McInnes   /* post receives:   */
3090452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
3108965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
3110452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
3128965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
3138965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
3148965ea79SLois Curfman McInnes     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3158965ea79SLois Curfman McInnes   }
3168965ea79SLois Curfman McInnes 
3178965ea79SLois Curfman McInnes   /* do sends:
3188965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
3198965ea79SLois Curfman McInnes          the ith processor
3208965ea79SLois Curfman McInnes   */
3210452661fSBarry Smith   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3227056b6fcSBarry Smith   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
3230452661fSBarry Smith   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3248965ea79SLois Curfman McInnes   starts[0]  = 0;
3258965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3268965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
3278965ea79SLois Curfman McInnes     svalues[starts[owner[i]]++] = rows[i];
3288965ea79SLois Curfman McInnes   }
3298965ea79SLois Curfman McInnes   ISRestoreIndices(is,&rows);
3308965ea79SLois Curfman McInnes 
3318965ea79SLois Curfman McInnes   starts[0] = 0;
3328965ea79SLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3338965ea79SLois Curfman McInnes   count = 0;
3348965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3358965ea79SLois Curfman McInnes     if (procs[i]) {
3368965ea79SLois Curfman McInnes       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
3378965ea79SLois Curfman McInnes     }
3388965ea79SLois Curfman McInnes   }
3390452661fSBarry Smith   PetscFree(starts);
3408965ea79SLois Curfman McInnes 
3418965ea79SLois Curfman McInnes   base = owners[rank];
3428965ea79SLois Curfman McInnes 
3438965ea79SLois Curfman McInnes   /*  wait on receives */
3440452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
3458965ea79SLois Curfman McInnes   source = lens + nrecvs;
3468965ea79SLois Curfman McInnes   count  = nrecvs; slen = 0;
3478965ea79SLois Curfman McInnes   while (count) {
3488965ea79SLois Curfman McInnes     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3498965ea79SLois Curfman McInnes     /* unpack receives into our local space */
3508965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPI_INT,&n);
3518965ea79SLois Curfman McInnes     source[imdex]  = recv_status.MPI_SOURCE;
3528965ea79SLois Curfman McInnes     lens[imdex]  = n;
3538965ea79SLois Curfman McInnes     slen += n;
3548965ea79SLois Curfman McInnes     count--;
3558965ea79SLois Curfman McInnes   }
3560452661fSBarry Smith   PetscFree(recv_waits);
3578965ea79SLois Curfman McInnes 
3588965ea79SLois Curfman McInnes   /* move the data into the send scatter */
3590452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
3608965ea79SLois Curfman McInnes   count = 0;
3618965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
3628965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
3638965ea79SLois Curfman McInnes     for ( j=0; j<lens[i]; j++ ) {
3648965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
3658965ea79SLois Curfman McInnes     }
3668965ea79SLois Curfman McInnes   }
3670452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
3680452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
3698965ea79SLois Curfman McInnes 
3708965ea79SLois Curfman McInnes   /* actually zap the local rows */
371537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
3728965ea79SLois Curfman McInnes   PLogObjectParent(A,istmp);
3730452661fSBarry Smith   PetscFree(lrows);
3748965ea79SLois Curfman McInnes   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
3758965ea79SLois Curfman McInnes   ierr = ISDestroy(istmp); CHKERRQ(ierr);
3768965ea79SLois Curfman McInnes 
3778965ea79SLois Curfman McInnes   /* wait on sends */
3788965ea79SLois Curfman McInnes   if (nsends) {
3797056b6fcSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
3808965ea79SLois Curfman McInnes     MPI_Waitall(nsends,send_waits,send_status);
3810452661fSBarry Smith     PetscFree(send_status);
3828965ea79SLois Curfman McInnes   }
3830452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
3848965ea79SLois Curfman McInnes 
3858965ea79SLois Curfman McInnes   return 0;
3868965ea79SLois Curfman McInnes }
3878965ea79SLois Curfman McInnes 
3885615d1e5SSatish Balay #undef __FUNC__
3895615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIDense"
39039ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
3918965ea79SLois Curfman McInnes {
39239ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3938965ea79SLois Curfman McInnes   int          ierr;
394c456f294SBarry Smith 
39543a90d84SBarry Smith   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
39643a90d84SBarry Smith   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
39744cd7ae7SLois Curfman McInnes   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
3988965ea79SLois Curfman McInnes   return 0;
3998965ea79SLois Curfman McInnes }
4008965ea79SLois Curfman McInnes 
4015615d1e5SSatish Balay #undef __FUNC__
4025615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIDense"
40339ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
4048965ea79SLois Curfman McInnes {
40539ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4068965ea79SLois Curfman McInnes   int          ierr;
407c456f294SBarry Smith 
40843a90d84SBarry Smith   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
40943a90d84SBarry Smith   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
41044cd7ae7SLois Curfman McInnes   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
4118965ea79SLois Curfman McInnes   return 0;
4128965ea79SLois Curfman McInnes }
4138965ea79SLois Curfman McInnes 
4145615d1e5SSatish Balay #undef __FUNC__
4155615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIDense"
416096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
417096963f5SLois Curfman McInnes {
418096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
419096963f5SLois Curfman McInnes   int          ierr;
4203501a2bdSLois Curfman McInnes   Scalar       zero = 0.0;
421096963f5SLois Curfman McInnes 
4223501a2bdSLois Curfman McInnes   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
42344cd7ae7SLois Curfman McInnes   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
424537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
425537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
426096963f5SLois Curfman McInnes   return 0;
427096963f5SLois Curfman McInnes }
428096963f5SLois Curfman McInnes 
4295615d1e5SSatish Balay #undef __FUNC__
4305615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIDense"
431096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
432096963f5SLois Curfman McInnes {
433096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
434096963f5SLois Curfman McInnes   int          ierr;
435096963f5SLois Curfman McInnes 
4363501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
43744cd7ae7SLois Curfman McInnes   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
438537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
439537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
440096963f5SLois Curfman McInnes   return 0;
441096963f5SLois Curfman McInnes }
442096963f5SLois Curfman McInnes 
4435615d1e5SSatish Balay #undef __FUNC__
4445615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIDense"
44539ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v)
4468965ea79SLois Curfman McInnes {
44739ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
448096963f5SLois Curfman McInnes   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
44944cd7ae7SLois Curfman McInnes   int          ierr, len, i, n, m = a->m, radd;
45044cd7ae7SLois Curfman McInnes   Scalar       *x, zero = 0.0;
451ed3cc1f0SBarry Smith 
45244cd7ae7SLois Curfman McInnes   VecSet(&zero,v);
453096963f5SLois Curfman McInnes   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
454096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
455e3372554SBarry Smith   if (n != a->M) SETERRQ(1,0,"Nonconforming mat and vec");
45644cd7ae7SLois Curfman McInnes   len = PetscMin(aloc->m,aloc->n);
4577ddc982cSLois Curfman McInnes   radd = a->rstart*m;
45844cd7ae7SLois Curfman McInnes   for ( i=0; i<len; i++ ) {
459096963f5SLois Curfman McInnes     x[i] = aloc->v[radd + i*m + i];
460096963f5SLois Curfman McInnes   }
461096963f5SLois Curfman McInnes   return 0;
4628965ea79SLois Curfman McInnes }
4638965ea79SLois Curfman McInnes 
4645615d1e5SSatish Balay #undef __FUNC__
4655615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIDense"
46639ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj)
4678965ea79SLois Curfman McInnes {
4688965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
4693501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4708965ea79SLois Curfman McInnes   int          ierr;
471ed3cc1f0SBarry Smith 
4728965ea79SLois Curfman McInnes #if defined(PETSC_LOG)
4733501a2bdSLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
4748965ea79SLois Curfman McInnes #endif
4750452661fSBarry Smith   PetscFree(mdn->rowners);
4763501a2bdSLois Curfman McInnes   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
4773501a2bdSLois Curfman McInnes   if (mdn->lvec)   VecDestroy(mdn->lvec);
4783501a2bdSLois Curfman McInnes   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
479622d7880SLois Curfman McInnes   if (mdn->factor) {
480622d7880SLois Curfman McInnes     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
481622d7880SLois Curfman McInnes     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
482622d7880SLois Curfman McInnes     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
483622d7880SLois Curfman McInnes     PetscFree(mdn->factor);
484622d7880SLois Curfman McInnes   }
4850452661fSBarry Smith   PetscFree(mdn);
48690f02eecSBarry Smith   if (mat->mapping) {
48790f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
48890f02eecSBarry Smith   }
4898965ea79SLois Curfman McInnes   PLogObjectDestroy(mat);
4900452661fSBarry Smith   PetscHeaderDestroy(mat);
4918965ea79SLois Curfman McInnes   return 0;
4928965ea79SLois Curfman McInnes }
49339ddd567SLois Curfman McInnes 
4948965ea79SLois Curfman McInnes #include "pinclude/pviewer.h"
4958965ea79SLois Curfman McInnes 
4965615d1e5SSatish Balay #undef __FUNC__
4975615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense_Binary"
49839ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
4998965ea79SLois Curfman McInnes {
50039ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
5018965ea79SLois Curfman McInnes   int          ierr;
5027056b6fcSBarry Smith 
50339ddd567SLois Curfman McInnes   if (mdn->size == 1) {
50439ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
5058965ea79SLois Curfman McInnes   }
506e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
5078965ea79SLois Curfman McInnes   return 0;
5088965ea79SLois Curfman McInnes }
5098965ea79SLois Curfman McInnes 
5105615d1e5SSatish Balay #undef __FUNC__
5115615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense_ASCII"
51239ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
5138965ea79SLois Curfman McInnes {
51439ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
51539ddd567SLois Curfman McInnes   int          ierr, format;
5168965ea79SLois Curfman McInnes   FILE         *fd;
51719bcc07fSBarry Smith   ViewerType   vtype;
5188965ea79SLois Curfman McInnes 
51919bcc07fSBarry Smith   ViewerGetType(viewer,&vtype);
52090ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
52190ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
522639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
5234e220ebcSLois Curfman McInnes     int rank;
5244e220ebcSLois Curfman McInnes     MatInfo info;
525096963f5SLois Curfman McInnes     MPI_Comm_rank(mat->comm,&rank);
5264e220ebcSLois Curfman McInnes     ierr = MatGetInfo(mat,MAT_LOCAL,&info);
52777c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
5284e220ebcSLois Curfman McInnes       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
5294e220ebcSLois Curfman McInnes          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
530096963f5SLois Curfman McInnes       fflush(fd);
53177c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
5323501a2bdSLois Curfman McInnes     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
5333501a2bdSLois Curfman McInnes     return 0;
5343501a2bdSLois Curfman McInnes   }
53502cad45dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_INFO) {
536096963f5SLois Curfman McInnes     return 0;
5378965ea79SLois Curfman McInnes   }
53819bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
53977c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
54039ddd567SLois Curfman McInnes     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
54139ddd567SLois Curfman McInnes              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
54239ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
5438965ea79SLois Curfman McInnes     fflush(fd);
54477c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
5458965ea79SLois Curfman McInnes   }
5468965ea79SLois Curfman McInnes   else {
54739ddd567SLois Curfman McInnes     int size = mdn->size, rank = mdn->rank;
5488965ea79SLois Curfman McInnes     if (size == 1) {
54939ddd567SLois Curfman McInnes       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
5508965ea79SLois Curfman McInnes     }
5518965ea79SLois Curfman McInnes     else {
5528965ea79SLois Curfman McInnes       /* assemble the entire matrix onto first processor. */
5538965ea79SLois Curfman McInnes       Mat          A;
55439ddd567SLois Curfman McInnes       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
55539ddd567SLois Curfman McInnes       Scalar       *vals;
55639ddd567SLois Curfman McInnes       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
5578965ea79SLois Curfman McInnes 
5588965ea79SLois Curfman McInnes       if (!rank) {
559*0513a670SBarry Smith         ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
5608965ea79SLois Curfman McInnes       }
5618965ea79SLois Curfman McInnes       else {
562*0513a670SBarry Smith         ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
5638965ea79SLois Curfman McInnes       }
5648965ea79SLois Curfman McInnes       PLogObjectParent(mat,A);
5658965ea79SLois Curfman McInnes 
56639ddd567SLois Curfman McInnes       /* Copy the matrix ... This isn't the most efficient means,
56739ddd567SLois Curfman McInnes          but it's quick for now */
56839ddd567SLois Curfman McInnes       row = mdn->rstart; m = Amdn->m;
5698965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
57039ddd567SLois Curfman McInnes         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
57139ddd567SLois Curfman McInnes         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
57239ddd567SLois Curfman McInnes         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
57339ddd567SLois Curfman McInnes         row++;
5748965ea79SLois Curfman McInnes       }
5758965ea79SLois Curfman McInnes 
5766d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5776d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5788965ea79SLois Curfman McInnes       if (!rank) {
57939ddd567SLois Curfman McInnes         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
5808965ea79SLois Curfman McInnes       }
5818965ea79SLois Curfman McInnes       ierr = MatDestroy(A); CHKERRQ(ierr);
5828965ea79SLois Curfman McInnes     }
5838965ea79SLois Curfman McInnes   }
5848965ea79SLois Curfman McInnes   return 0;
5858965ea79SLois Curfman McInnes }
5868965ea79SLois Curfman McInnes 
5875615d1e5SSatish Balay #undef __FUNC__
5885615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense"
58939ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer)
5908965ea79SLois Curfman McInnes {
5918965ea79SLois Curfman McInnes   Mat          mat = (Mat) obj;
59239ddd567SLois Curfman McInnes   int          ierr;
593bcd2baecSBarry Smith   ViewerType   vtype;
5948965ea79SLois Curfman McInnes 
595bcd2baecSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
596bcd2baecSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
59739ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
5988965ea79SLois Curfman McInnes   }
599bcd2baecSBarry Smith   else if (vtype == ASCII_FILES_VIEWER) {
60039ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
6018965ea79SLois Curfman McInnes   }
602bcd2baecSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
60339ddd567SLois Curfman McInnes     return MatView_MPIDense_Binary(mat,viewer);
6048965ea79SLois Curfman McInnes   }
6058965ea79SLois Curfman McInnes   return 0;
6068965ea79SLois Curfman McInnes }
6078965ea79SLois Curfman McInnes 
6085615d1e5SSatish Balay #undef __FUNC__
6095615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIDense"
6104e220ebcSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
6118965ea79SLois Curfman McInnes {
6123501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6133501a2bdSLois Curfman McInnes   Mat          mdn = mat->A;
6144e220ebcSLois Curfman McInnes   int          ierr;
6154e220ebcSLois Curfman McInnes   double       isend[5], irecv[5];
6168965ea79SLois Curfman McInnes 
6174e220ebcSLois Curfman McInnes   info->rows_global    = (double)mat->M;
6184e220ebcSLois Curfman McInnes   info->columns_global = (double)mat->N;
6194e220ebcSLois Curfman McInnes   info->rows_local     = (double)mat->m;
6204e220ebcSLois Curfman McInnes   info->columns_local  = (double)mat->N;
6214e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
6224e220ebcSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr);
6234e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
6244e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
6258965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
6264e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
6274e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
6284e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
6294e220ebcSLois Curfman McInnes     info->memory       = isend[3];
6304e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
6318965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
6323501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
6334e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
6344e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
6354e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
6364e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
6374e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
6388965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
6393501a2bdSLois Curfman McInnes     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
6404e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
6414e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
6424e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
6434e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
6444e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
6458965ea79SLois Curfman McInnes   }
6464e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
6474e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
6484e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
6498965ea79SLois Curfman McInnes   return 0;
6508965ea79SLois Curfman McInnes }
6518965ea79SLois Curfman McInnes 
6528c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
6538aaee692SLois Curfman McInnes    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
6548aaee692SLois Curfman McInnes    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
6558aaee692SLois Curfman McInnes    extern int MatSolve_MPIDense(Mat,Vec,Vec);
6568c469469SLois Curfman McInnes    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
6578aaee692SLois Curfman McInnes    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
6588aaee692SLois Curfman McInnes    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
6598aaee692SLois Curfman McInnes 
6605615d1e5SSatish Balay #undef __FUNC__
6615615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIDense"
66239ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op)
6638965ea79SLois Curfman McInnes {
66439ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
6658965ea79SLois Curfman McInnes 
6666d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
6676d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
668219d9a1aSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
669219d9a1aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED) {
670b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
671b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
672aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
6738965ea79SLois Curfman McInnes         MatSetOption(a->A,op);
674b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
675219d9a1aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
6766d4a8577SBarry Smith              op == MAT_SYMMETRIC ||
6776d4a8577SBarry Smith              op == MAT_STRUCTURALLY_SYMMETRIC ||
6786d4a8577SBarry Smith              op == MAT_YES_NEW_DIAGONALS)
67994a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n");
6806d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)
68139b7565bSBarry Smith     {a->roworiented = 0; MatSetOption(a->A,op);}
6826d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
683e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
6848965ea79SLois Curfman McInnes   else
685e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
6868965ea79SLois Curfman McInnes   return 0;
6878965ea79SLois Curfman McInnes }
6888965ea79SLois Curfman McInnes 
6895615d1e5SSatish Balay #undef __FUNC__
6905615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIDense"
6913501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n)
6928965ea79SLois Curfman McInnes {
6933501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6948965ea79SLois Curfman McInnes   *m = mat->M; *n = mat->N;
6958965ea79SLois Curfman McInnes   return 0;
6968965ea79SLois Curfman McInnes }
6978965ea79SLois Curfman McInnes 
6985615d1e5SSatish Balay #undef __FUNC__
6995615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIDense"
7003501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
7018965ea79SLois Curfman McInnes {
7023501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
7038965ea79SLois Curfman McInnes   *m = mat->m; *n = mat->N;
7048965ea79SLois Curfman McInnes   return 0;
7058965ea79SLois Curfman McInnes }
7068965ea79SLois Curfman McInnes 
7075615d1e5SSatish Balay #undef __FUNC__
7085615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIDense"
7093501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
7108965ea79SLois Curfman McInnes {
7113501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
7128965ea79SLois Curfman McInnes   *m = mat->rstart; *n = mat->rend;
7138965ea79SLois Curfman McInnes   return 0;
7148965ea79SLois Curfman McInnes }
7158965ea79SLois Curfman McInnes 
7165615d1e5SSatish Balay #undef __FUNC__
7175615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIDense"
7183501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
7198965ea79SLois Curfman McInnes {
7203501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
72139ddd567SLois Curfman McInnes   int          lrow, rstart = mat->rstart, rend = mat->rend;
7228965ea79SLois Curfman McInnes 
723e3372554SBarry Smith   if (row < rstart || row >= rend) SETERRQ(1,0,"only local rows")
7248965ea79SLois Curfman McInnes   lrow = row - rstart;
72539ddd567SLois Curfman McInnes   return MatGetRow(mat->A,lrow,nz,idx,v);
7268965ea79SLois Curfman McInnes }
7278965ea79SLois Curfman McInnes 
7285615d1e5SSatish Balay #undef __FUNC__
7295615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIDense"
73039ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
7318965ea79SLois Curfman McInnes {
7320452661fSBarry Smith   if (idx) PetscFree(*idx);
7330452661fSBarry Smith   if (v) PetscFree(*v);
7348965ea79SLois Curfman McInnes   return 0;
7358965ea79SLois Curfman McInnes }
7368965ea79SLois Curfman McInnes 
7375615d1e5SSatish Balay #undef __FUNC__
7385615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIDense"
739cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
740096963f5SLois Curfman McInnes {
7413501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
7423501a2bdSLois Curfman McInnes   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
7433501a2bdSLois Curfman McInnes   int          ierr, i, j;
7443501a2bdSLois Curfman McInnes   double       sum = 0.0;
7453501a2bdSLois Curfman McInnes   Scalar       *v = mat->v;
7463501a2bdSLois Curfman McInnes 
7473501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
7483501a2bdSLois Curfman McInnes     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
7493501a2bdSLois Curfman McInnes   } else {
7503501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
7513501a2bdSLois Curfman McInnes       for (i=0; i<mat->n*mat->m; i++ ) {
7523501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX)
7533501a2bdSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
7543501a2bdSLois Curfman McInnes #else
7553501a2bdSLois Curfman McInnes         sum += (*v)*(*v); v++;
7563501a2bdSLois Curfman McInnes #endif
7573501a2bdSLois Curfman McInnes       }
7586d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
7593501a2bdSLois Curfman McInnes       *norm = sqrt(*norm);
7603501a2bdSLois Curfman McInnes       PLogFlops(2*mat->n*mat->m);
7613501a2bdSLois Curfman McInnes     }
7623501a2bdSLois Curfman McInnes     else if (type == NORM_1) {
7633501a2bdSLois Curfman McInnes       double *tmp, *tmp2;
7640452661fSBarry Smith       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
7653501a2bdSLois Curfman McInnes       tmp2 = tmp + mdn->N;
766cddf8d76SBarry Smith       PetscMemzero(tmp,2*mdn->N*sizeof(double));
767096963f5SLois Curfman McInnes       *norm = 0.0;
7683501a2bdSLois Curfman McInnes       v = mat->v;
7693501a2bdSLois Curfman McInnes       for ( j=0; j<mat->n; j++ ) {
7703501a2bdSLois Curfman McInnes         for ( i=0; i<mat->m; i++ ) {
77167e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
7723501a2bdSLois Curfman McInnes         }
7733501a2bdSLois Curfman McInnes       }
7746d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
7753501a2bdSLois Curfman McInnes       for ( j=0; j<mdn->N; j++ ) {
7763501a2bdSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
7773501a2bdSLois Curfman McInnes       }
7780452661fSBarry Smith       PetscFree(tmp);
7793501a2bdSLois Curfman McInnes       PLogFlops(mat->n*mat->m);
7803501a2bdSLois Curfman McInnes     }
7813501a2bdSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
7823501a2bdSLois Curfman McInnes       double ntemp;
7833501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
7846d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
7853501a2bdSLois Curfman McInnes     }
7863501a2bdSLois Curfman McInnes     else {
787e3372554SBarry Smith       SETERRQ(1,0,"No support for two norm");
7883501a2bdSLois Curfman McInnes     }
7893501a2bdSLois Curfman McInnes   }
7903501a2bdSLois Curfman McInnes   return 0;
7913501a2bdSLois Curfman McInnes }
7923501a2bdSLois Curfman McInnes 
7935615d1e5SSatish Balay #undef __FUNC__
7945615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIDense"
7953501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout)
7963501a2bdSLois Curfman McInnes {
7973501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
7983501a2bdSLois Curfman McInnes   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
7993501a2bdSLois Curfman McInnes   Mat          B;
8003501a2bdSLois Curfman McInnes   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
8013501a2bdSLois Curfman McInnes   int          j, i, ierr;
8023501a2bdSLois Curfman McInnes   Scalar       *v;
8033501a2bdSLois Curfman McInnes 
8047056b6fcSBarry Smith   if (matout == PETSC_NULL && M != N) {
805e3372554SBarry Smith     SETERRQ(1,0,"Supports square matrix only in-place");
8067056b6fcSBarry Smith   }
8077056b6fcSBarry Smith   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
8083501a2bdSLois Curfman McInnes 
8093501a2bdSLois Curfman McInnes   m = Aloc->m; n = Aloc->n; v = Aloc->v;
8100452661fSBarry Smith   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
8113501a2bdSLois Curfman McInnes   for ( j=0; j<n; j++ ) {
8123501a2bdSLois Curfman McInnes     for (i=0; i<m; i++) rwork[i] = rstart + i;
8133501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
8143501a2bdSLois Curfman McInnes     v   += m;
8153501a2bdSLois Curfman McInnes   }
8160452661fSBarry Smith   PetscFree(rwork);
8176d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8186d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8193638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
8203501a2bdSLois Curfman McInnes     *matout = B;
8213501a2bdSLois Curfman McInnes   } else {
8223501a2bdSLois Curfman McInnes     /* This isn't really an in-place transpose, but free data struct from a */
8230452661fSBarry Smith     PetscFree(a->rowners);
8243501a2bdSLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
8253501a2bdSLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
8263501a2bdSLois Curfman McInnes     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
8270452661fSBarry Smith     PetscFree(a);
8283501a2bdSLois Curfman McInnes     PetscMemcpy(A,B,sizeof(struct _Mat));
8290452661fSBarry Smith     PetscHeaderDestroy(B);
8303501a2bdSLois Curfman McInnes   }
831096963f5SLois Curfman McInnes   return 0;
832096963f5SLois Curfman McInnes }
833096963f5SLois Curfman McInnes 
83444cd7ae7SLois Curfman McInnes #include "pinclude/plapack.h"
8355615d1e5SSatish Balay #undef __FUNC__
8365615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIDense"
83744cd7ae7SLois Curfman McInnes static int MatScale_MPIDense(Scalar *alpha,Mat inA)
83844cd7ae7SLois Curfman McInnes {
83944cd7ae7SLois Curfman McInnes   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
84044cd7ae7SLois Curfman McInnes   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
84144cd7ae7SLois Curfman McInnes   int          one = 1, nz;
84244cd7ae7SLois Curfman McInnes 
84344cd7ae7SLois Curfman McInnes   nz = a->m*a->n;
84444cd7ae7SLois Curfman McInnes   BLscal_( &nz, alpha, a->v, &one );
84544cd7ae7SLois Curfman McInnes   PLogFlops(nz);
84644cd7ae7SLois Curfman McInnes   return 0;
84744cd7ae7SLois Curfman McInnes }
84844cd7ae7SLois Curfman McInnes 
8493d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat,Mat *,int);
8508965ea79SLois Curfman McInnes 
8518965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
85239ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense,
85339ddd567SLois Curfman McInnes        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
85439ddd567SLois Curfman McInnes        MatMult_MPIDense,MatMultAdd_MPIDense,
855096963f5SLois Curfman McInnes        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
856e7ca0642SLois Curfman McInnes /*       MatSolve_MPIDense,0, */
857e7ca0642SLois Curfman McInnes        0,0,
85839ddd567SLois Curfman McInnes        0,0,
8598c469469SLois Curfman McInnes        0,0,
8608c469469SLois Curfman McInnes /*       MatLUFactor_MPIDense,0, */
8618aaee692SLois Curfman McInnes        0,MatTranspose_MPIDense,
86239ddd567SLois Curfman McInnes        MatGetInfo_MPIDense,0,
863096963f5SLois Curfman McInnes        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
86439ddd567SLois Curfman McInnes        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
8658965ea79SLois Curfman McInnes        0,
86639ddd567SLois Curfman McInnes        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
8673b2fbd54SBarry Smith        0,0,
8688c469469SLois Curfman McInnes /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
8698aaee692SLois Curfman McInnes        0,0,
87039ddd567SLois Curfman McInnes        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
87139ddd567SLois Curfman McInnes        MatGetOwnershipRange_MPIDense,
872ff14e315SSatish Balay        0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense,
873905e6a2fSBarry Smith        0,MatConvertSameType_MPIDense,
874b49de8d1SLois Curfman McInnes        0,0,0,0,0,
8754e220ebcSLois Curfman McInnes        0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense,
8764e220ebcSLois Curfman McInnes        0,0,0,MatGetBlockSize_MPIDense};
8778965ea79SLois Curfman McInnes 
8785615d1e5SSatish Balay #undef __FUNC__
8795615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIDense"
8808965ea79SLois Curfman McInnes /*@C
88139ddd567SLois Curfman McInnes    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
8828965ea79SLois Curfman McInnes 
8838965ea79SLois Curfman McInnes    Input Parameters:
8848965ea79SLois Curfman McInnes .  comm - MPI communicator
8858965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
8868965ea79SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
8878965ea79SLois Curfman McInnes            if N is given)
8888965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
8898965ea79SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
8908965ea79SLois Curfman McInnes            if n is given)
891b4fd4287SBarry Smith .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
892dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
8938965ea79SLois Curfman McInnes 
8948965ea79SLois Curfman McInnes    Output Parameter:
895477f1c0bSLois Curfman McInnes .  A - the matrix
8968965ea79SLois Curfman McInnes 
8978965ea79SLois Curfman McInnes    Notes:
89839ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
89939ddd567SLois Curfman McInnes    storage by columns.
9008965ea79SLois Curfman McInnes 
90118f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
90218f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
903b4fd4287SBarry Smith    set data=PETSC_NULL.
90418f449edSLois Curfman McInnes 
9058965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
9068965ea79SLois Curfman McInnes    (possibly both).
9078965ea79SLois Curfman McInnes 
9083501a2bdSLois Curfman McInnes    Currently, the only parallel dense matrix decomposition is by rows,
9093501a2bdSLois Curfman McInnes    so that n=N and each submatrix owns all of the global columns.
9103501a2bdSLois Curfman McInnes 
91139ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel
9128965ea79SLois Curfman McInnes 
91339ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
9148965ea79SLois Curfman McInnes @*/
915477f1c0bSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
9168965ea79SLois Curfman McInnes {
9178965ea79SLois Curfman McInnes   Mat          mat;
91839ddd567SLois Curfman McInnes   Mat_MPIDense *a;
91925cdf11fSBarry Smith   int          ierr, i,flg;
9208965ea79SLois Curfman McInnes 
921ed2daf61SLois Curfman McInnes   /* Note:  For now, when data is specified above, this assumes the user correctly
922ed2daf61SLois Curfman McInnes    allocates the local dense storage space.  We should add error checking. */
92318f449edSLois Curfman McInnes 
924477f1c0bSLois Curfman McInnes   *A = 0;
9250452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
9268965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
9270452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
9288965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
92939ddd567SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIDense;
93039ddd567SLois Curfman McInnes   mat->view       = MatView_MPIDense;
9318965ea79SLois Curfman McInnes   mat->factor     = 0;
93290f02eecSBarry Smith   mat->mapping    = 0;
9338965ea79SLois Curfman McInnes 
934622d7880SLois Curfman McInnes   a->factor       = 0;
9358965ea79SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
9368965ea79SLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
9378965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
9388965ea79SLois Curfman McInnes 
93939ddd567SLois Curfman McInnes   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
9408965ea79SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
94139ddd567SLois Curfman McInnes 
94239ddd567SLois Curfman McInnes   /* each row stores all columns */
94339ddd567SLois Curfman McInnes   if (N == PETSC_DECIDE) N = n;
944d7e8b826SBarry Smith   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
945e3372554SBarry Smith   /*  if (n != N) SETERRQ(1,0,"For now, only n=N is supported"); */
946aca0ad90SLois Curfman McInnes   a->N = mat->N = N;
947aca0ad90SLois Curfman McInnes   a->M = mat->M = M;
948aca0ad90SLois Curfman McInnes   a->m = mat->m = m;
949aca0ad90SLois Curfman McInnes   a->n = mat->n = n;
9508965ea79SLois Curfman McInnes 
9518965ea79SLois Curfman McInnes   /* build local table of row and column ownerships */
952d7e8b826SBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
953d7e8b826SBarry Smith   a->cowners = a->rowners + a->size + 1;
9547056b6fcSBarry Smith   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
9558965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
9568965ea79SLois Curfman McInnes   a->rowners[0] = 0;
9578965ea79SLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
9588965ea79SLois Curfman McInnes     a->rowners[i] += a->rowners[i-1];
9598965ea79SLois Curfman McInnes   }
9608965ea79SLois Curfman McInnes   a->rstart = a->rowners[a->rank];
9618965ea79SLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
962d7e8b826SBarry Smith   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
963d7e8b826SBarry Smith   a->cowners[0] = 0;
964d7e8b826SBarry Smith   for ( i=2; i<=a->size; i++ ) {
965d7e8b826SBarry Smith     a->cowners[i] += a->cowners[i-1];
966d7e8b826SBarry Smith   }
9678965ea79SLois Curfman McInnes 
96818f449edSLois Curfman McInnes   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
9698965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
9708965ea79SLois Curfman McInnes 
9718965ea79SLois Curfman McInnes   /* build cache for off array entries formed */
9728965ea79SLois Curfman McInnes   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
9738965ea79SLois Curfman McInnes 
9748965ea79SLois Curfman McInnes   /* stuff used for matrix vector multiply */
9758965ea79SLois Curfman McInnes   a->lvec        = 0;
9768965ea79SLois Curfman McInnes   a->Mvctx       = 0;
97739b7565bSBarry Smith   a->roworiented = 1;
9788965ea79SLois Curfman McInnes 
979477f1c0bSLois Curfman McInnes   *A = mat;
98025cdf11fSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
98125cdf11fSBarry Smith   if (flg) {
9828c469469SLois Curfman McInnes     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
9838c469469SLois Curfman McInnes   }
9848965ea79SLois Curfman McInnes   return 0;
9858965ea79SLois Curfman McInnes }
9868965ea79SLois Curfman McInnes 
9875615d1e5SSatish Balay #undef __FUNC__
9885615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIDense"
9893d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
9908965ea79SLois Curfman McInnes {
9918965ea79SLois Curfman McInnes   Mat          mat;
9923501a2bdSLois Curfman McInnes   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
99339ddd567SLois Curfman McInnes   int          ierr;
9942ba99913SLois Curfman McInnes   FactorCtx    *factor;
9958965ea79SLois Curfman McInnes 
9968965ea79SLois Curfman McInnes   *newmat       = 0;
9970452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
9988965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
9990452661fSBarry Smith   mat->data      = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
10008965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
100139ddd567SLois Curfman McInnes   mat->destroy   = MatDestroy_MPIDense;
100239ddd567SLois Curfman McInnes   mat->view      = MatView_MPIDense;
10033501a2bdSLois Curfman McInnes   mat->factor    = A->factor;
1004c456f294SBarry Smith   mat->assembled = PETSC_TRUE;
10058965ea79SLois Curfman McInnes 
100644cd7ae7SLois Curfman McInnes   a->m = mat->m = oldmat->m;
100744cd7ae7SLois Curfman McInnes   a->n = mat->n = oldmat->n;
100844cd7ae7SLois Curfman McInnes   a->M = mat->M = oldmat->M;
100944cd7ae7SLois Curfman McInnes   a->N = mat->N = oldmat->N;
10102ba99913SLois Curfman McInnes   if (oldmat->factor) {
10112ba99913SLois Curfman McInnes     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
10122ba99913SLois Curfman McInnes     /* copy factor contents ... add this code! */
10132ba99913SLois Curfman McInnes   } else a->factor = 0;
10148965ea79SLois Curfman McInnes 
10158965ea79SLois Curfman McInnes   a->rstart     = oldmat->rstart;
10168965ea79SLois Curfman McInnes   a->rend       = oldmat->rend;
10178965ea79SLois Curfman McInnes   a->size       = oldmat->size;
10188965ea79SLois Curfman McInnes   a->rank       = oldmat->rank;
10198965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
10208965ea79SLois Curfman McInnes 
10210452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
102239ddd567SLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
10238965ea79SLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
10248965ea79SLois Curfman McInnes   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
10258965ea79SLois Curfman McInnes 
10268965ea79SLois Curfman McInnes   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
10278965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->lvec);
102855659b69SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
10298965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->Mvctx);
10308965ea79SLois Curfman McInnes   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
10318965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
10328965ea79SLois Curfman McInnes   *newmat = mat;
10338965ea79SLois Curfman McInnes   return 0;
10348965ea79SLois Curfman McInnes }
10358965ea79SLois Curfman McInnes 
103677c4ece6SBarry Smith #include "sys.h"
10378965ea79SLois Curfman McInnes 
10385615d1e5SSatish Balay #undef __FUNC__
10395615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
104090ace30eSBarry Smith int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
104190ace30eSBarry Smith {
104290ace30eSBarry Smith   int        *rowners, i,size,rank,m,rstart,rend,ierr,nz,j;
104390ace30eSBarry Smith   Scalar     *array,*vals,*vals_ptr;
104490ace30eSBarry Smith   MPI_Status status;
104590ace30eSBarry Smith 
104690ace30eSBarry Smith   MPI_Comm_rank(comm,&rank);
104790ace30eSBarry Smith   MPI_Comm_size(comm,&size);
104890ace30eSBarry Smith 
104990ace30eSBarry Smith   /* determine ownership of all rows */
105090ace30eSBarry Smith   m = M/size + ((M % size) > rank);
105190ace30eSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
105290ace30eSBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
105390ace30eSBarry Smith   rowners[0] = 0;
105490ace30eSBarry Smith   for ( i=2; i<=size; i++ ) {
105590ace30eSBarry Smith     rowners[i] += rowners[i-1];
105690ace30eSBarry Smith   }
105790ace30eSBarry Smith   rstart = rowners[rank];
105890ace30eSBarry Smith   rend   = rowners[rank+1];
105990ace30eSBarry Smith 
106090ace30eSBarry Smith   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
106190ace30eSBarry Smith   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
106290ace30eSBarry Smith 
106390ace30eSBarry Smith   if (!rank) {
106490ace30eSBarry Smith     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
106590ace30eSBarry Smith 
106690ace30eSBarry Smith     /* read in my part of the matrix numerical values  */
106777c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr);
106890ace30eSBarry Smith 
106990ace30eSBarry Smith     /* insert into matrix-by row (this is why cannot directly read into array */
107090ace30eSBarry Smith     vals_ptr = vals;
107190ace30eSBarry Smith     for ( i=0; i<m; i++ ) {
107290ace30eSBarry Smith       for ( j=0; j<N; j++ ) {
107390ace30eSBarry Smith         array[i + j*m] = *vals_ptr++;
107490ace30eSBarry Smith       }
107590ace30eSBarry Smith     }
107690ace30eSBarry Smith 
107790ace30eSBarry Smith     /* read in other processors and ship out */
107890ace30eSBarry Smith     for ( i=1; i<size; i++ ) {
107990ace30eSBarry Smith       nz   = (rowners[i+1] - rowners[i])*N;
108077c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
108190ace30eSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
108290ace30eSBarry Smith     }
108390ace30eSBarry Smith   }
108490ace30eSBarry Smith   else {
108590ace30eSBarry Smith     /* receive numeric values */
108690ace30eSBarry Smith     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
108790ace30eSBarry Smith 
108890ace30eSBarry Smith     /* receive message of values*/
108990ace30eSBarry Smith     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
109090ace30eSBarry Smith 
109190ace30eSBarry Smith     /* insert into matrix-by row (this is why cannot directly read into array */
109290ace30eSBarry Smith     vals_ptr = vals;
109390ace30eSBarry Smith     for ( i=0; i<m; i++ ) {
109490ace30eSBarry Smith       for ( j=0; j<N; j++ ) {
109590ace30eSBarry Smith         array[i + j*m] = *vals_ptr++;
109690ace30eSBarry Smith       }
109790ace30eSBarry Smith     }
109890ace30eSBarry Smith   }
109990ace30eSBarry Smith   PetscFree(rowners);
110090ace30eSBarry Smith   PetscFree(vals);
11016d4a8577SBarry Smith   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11026d4a8577SBarry Smith   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
110390ace30eSBarry Smith   return 0;
110490ace30eSBarry Smith }
110590ace30eSBarry Smith 
110690ace30eSBarry Smith 
11075615d1e5SSatish Balay #undef __FUNC__
11085615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIDense"
110919bcc07fSBarry Smith int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
11108965ea79SLois Curfman McInnes {
11118965ea79SLois Curfman McInnes   Mat          A;
11128965ea79SLois Curfman McInnes   int          i, nz, ierr, j,rstart, rend, fd;
11138965ea79SLois Curfman McInnes   Scalar       *vals,*svals;
111419bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
11158965ea79SLois Curfman McInnes   MPI_Status   status;
11168965ea79SLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
11178965ea79SLois Curfman McInnes   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
111819bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
11198965ea79SLois Curfman McInnes 
11208965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
11218965ea79SLois Curfman McInnes   if (!rank) {
112290ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
112377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1124e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
11258965ea79SLois Curfman McInnes   }
11268965ea79SLois Curfman McInnes 
11278965ea79SLois Curfman McInnes   MPI_Bcast(header+1,3,MPI_INT,0,comm);
112890ace30eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
112990ace30eSBarry Smith 
113090ace30eSBarry Smith   /*
113190ace30eSBarry Smith        Handle case where matrix is stored on disk as a dense matrix
113290ace30eSBarry Smith   */
113390ace30eSBarry Smith   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
113490ace30eSBarry Smith     return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
113590ace30eSBarry Smith   }
113690ace30eSBarry Smith 
11378965ea79SLois Curfman McInnes   /* determine ownership of all rows */
11388965ea79SLois Curfman McInnes   m = M/size + ((M % size) > rank);
11390452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
11408965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
11418965ea79SLois Curfman McInnes   rowners[0] = 0;
11428965ea79SLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
11438965ea79SLois Curfman McInnes     rowners[i] += rowners[i-1];
11448965ea79SLois Curfman McInnes   }
11458965ea79SLois Curfman McInnes   rstart = rowners[rank];
11468965ea79SLois Curfman McInnes   rend   = rowners[rank+1];
11478965ea79SLois Curfman McInnes 
11488965ea79SLois Curfman McInnes   /* distribute row lengths to all processors */
11490452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
11508965ea79SLois Curfman McInnes   offlens = ourlens + (rend-rstart);
11518965ea79SLois Curfman McInnes   if (!rank) {
11520452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
115377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
11540452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
11558965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
11568965ea79SLois Curfman McInnes     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
11570452661fSBarry Smith     PetscFree(sndcounts);
11588965ea79SLois Curfman McInnes   }
11598965ea79SLois Curfman McInnes   else {
11608965ea79SLois Curfman McInnes     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
11618965ea79SLois Curfman McInnes   }
11628965ea79SLois Curfman McInnes 
11638965ea79SLois Curfman McInnes   if (!rank) {
11648965ea79SLois Curfman McInnes     /* calculate the number of nonzeros on each processor */
11650452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1166cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
11678965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
11688965ea79SLois Curfman McInnes       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
11698965ea79SLois Curfman McInnes         procsnz[i] += rowlengths[j];
11708965ea79SLois Curfman McInnes       }
11718965ea79SLois Curfman McInnes     }
11720452661fSBarry Smith     PetscFree(rowlengths);
11738965ea79SLois Curfman McInnes 
11748965ea79SLois Curfman McInnes     /* determine max buffer needed and allocate it */
11758965ea79SLois Curfman McInnes     maxnz = 0;
11768965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
11770452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
11788965ea79SLois Curfman McInnes     }
11790452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
11808965ea79SLois Curfman McInnes 
11818965ea79SLois Curfman McInnes     /* read in my part of the matrix column indices  */
11828965ea79SLois Curfman McInnes     nz = procsnz[0];
11830452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
118477c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
11858965ea79SLois Curfman McInnes 
11868965ea79SLois Curfman McInnes     /* read in every one elses and ship off */
11878965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
11888965ea79SLois Curfman McInnes       nz = procsnz[i];
118977c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
11908965ea79SLois Curfman McInnes       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
11918965ea79SLois Curfman McInnes     }
11920452661fSBarry Smith     PetscFree(cols);
11938965ea79SLois Curfman McInnes   }
11948965ea79SLois Curfman McInnes   else {
11958965ea79SLois Curfman McInnes     /* determine buffer space needed for message */
11968965ea79SLois Curfman McInnes     nz = 0;
11978965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
11988965ea79SLois Curfman McInnes       nz += ourlens[i];
11998965ea79SLois Curfman McInnes     }
12000452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
12018965ea79SLois Curfman McInnes 
12028965ea79SLois Curfman McInnes     /* receive message of column indices*/
12038965ea79SLois Curfman McInnes     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
12048965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPI_INT,&maxnz);
1205e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
12068965ea79SLois Curfman McInnes   }
12078965ea79SLois Curfman McInnes 
12088965ea79SLois Curfman McInnes   /* loop over local rows, determining number of off diagonal entries */
1209cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
12108965ea79SLois Curfman McInnes   jj = 0;
12118965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
12128965ea79SLois Curfman McInnes     for ( j=0; j<ourlens[i]; j++ ) {
12138965ea79SLois Curfman McInnes       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
12148965ea79SLois Curfman McInnes       jj++;
12158965ea79SLois Curfman McInnes     }
12168965ea79SLois Curfman McInnes   }
12178965ea79SLois Curfman McInnes 
12188965ea79SLois Curfman McInnes   /* create our matrix */
12198965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
12208965ea79SLois Curfman McInnes     ourlens[i] -= offlens[i];
12218965ea79SLois Curfman McInnes   }
1222b4fd4287SBarry Smith   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
12238965ea79SLois Curfman McInnes   A = *newmat;
12248965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
12258965ea79SLois Curfman McInnes     ourlens[i] += offlens[i];
12268965ea79SLois Curfman McInnes   }
12278965ea79SLois Curfman McInnes 
12288965ea79SLois Curfman McInnes   if (!rank) {
12290452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
12308965ea79SLois Curfman McInnes 
12318965ea79SLois Curfman McInnes     /* read in my part of the matrix numerical values  */
12328965ea79SLois Curfman McInnes     nz = procsnz[0];
123377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
12348965ea79SLois Curfman McInnes 
12358965ea79SLois Curfman McInnes     /* insert into matrix */
12368965ea79SLois Curfman McInnes     jj      = rstart;
12378965ea79SLois Curfman McInnes     smycols = mycols;
12388965ea79SLois Curfman McInnes     svals   = vals;
12398965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
12408965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
12418965ea79SLois Curfman McInnes       smycols += ourlens[i];
12428965ea79SLois Curfman McInnes       svals   += ourlens[i];
12438965ea79SLois Curfman McInnes       jj++;
12448965ea79SLois Curfman McInnes     }
12458965ea79SLois Curfman McInnes 
12468965ea79SLois Curfman McInnes     /* read in other processors and ship out */
12478965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
12488965ea79SLois Curfman McInnes       nz = procsnz[i];
124977c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
12508965ea79SLois Curfman McInnes       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
12518965ea79SLois Curfman McInnes     }
12520452661fSBarry Smith     PetscFree(procsnz);
12538965ea79SLois Curfman McInnes   }
12548965ea79SLois Curfman McInnes   else {
12558965ea79SLois Curfman McInnes     /* receive numeric values */
12560452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
12578965ea79SLois Curfman McInnes 
12588965ea79SLois Curfman McInnes     /* receive message of values*/
12598965ea79SLois Curfman McInnes     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
12608965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1261e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
12628965ea79SLois Curfman McInnes 
12638965ea79SLois Curfman McInnes     /* insert into matrix */
12648965ea79SLois Curfman McInnes     jj      = rstart;
12658965ea79SLois Curfman McInnes     smycols = mycols;
12668965ea79SLois Curfman McInnes     svals   = vals;
12678965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
12688965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
12698965ea79SLois Curfman McInnes       smycols += ourlens[i];
12708965ea79SLois Curfman McInnes       svals   += ourlens[i];
12718965ea79SLois Curfman McInnes       jj++;
12728965ea79SLois Curfman McInnes     }
12738965ea79SLois Curfman McInnes   }
12740452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
12758965ea79SLois Curfman McInnes 
12766d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12776d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12788965ea79SLois Curfman McInnes   return 0;
12798965ea79SLois Curfman McInnes }
128090ace30eSBarry Smith 
128190ace30eSBarry Smith 
128290ace30eSBarry Smith 
128390ace30eSBarry Smith 
128490ace30eSBarry Smith 
1285