xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision bb5a7306ba2b79be99f6d9dff5d00348384195e8)
179bdfe76SSatish Balay #ifndef lint
2*bb5a7306SBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.44 1997/01/01 03:38:25 bsmith Exp bsmith $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2357b952d6SSatish Balay 
243b2fbd54SBarry Smith 
25537820f0SBarry Smith /*
26537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2957b952d6SSatish Balay    length of colmap equals the global matrix length.
3057b952d6SSatish Balay */
31026e39d0SSatish Balay #undef __FUNCTION__
32026e39d0SSatish Balay #define __FUNCTION__ "CreateColmap_MPIBAIJ_Private"
3357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3457b952d6SSatish Balay {
3557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
37928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3857b952d6SSatish Balay 
3957b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
4057b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4157b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
42928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4357b952d6SSatish Balay   return 0;
4457b952d6SSatish Balay }
4557b952d6SSatish Balay 
46026e39d0SSatish Balay #undef __FUNCTION__
47026e39d0SSatish Balay #define __FUNCTION__ "MatGetRowIJ_MPIBAIJ("
483b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
5057b952d6SSatish Balay {
513b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5257b952d6SSatish Balay   int         ierr;
533b2fbd54SBarry Smith   if (aij->size == 1) {
543b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
55e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
563b2fbd54SBarry Smith   return 0;
573b2fbd54SBarry Smith }
583b2fbd54SBarry Smith 
59026e39d0SSatish Balay #undef __FUNCTION__
60026e39d0SSatish Balay #define __FUNCTION__ "MatRestoreRowIJ_MPIBAIJ"
613b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
623b2fbd54SBarry Smith                                 PetscTruth *done)
633b2fbd54SBarry Smith {
643b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
653b2fbd54SBarry Smith   int        ierr;
663b2fbd54SBarry Smith   if (aij->size == 1) {
673b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
68e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6957b952d6SSatish Balay   return 0;
7057b952d6SSatish Balay }
7157b952d6SSatish Balay 
72639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
73026e39d0SSatish Balay #undef __FUNCTION__
74026e39d0SSatish Balay #define __FUNCTION__ "MatSetValues_MPIBAIJ"
7557b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
7657b952d6SSatish Balay {
7757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
7857b952d6SSatish Balay   Scalar      value;
794fa0d573SSatish Balay   int         ierr,i,j,row,col;
804fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
814fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
824fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
8357b952d6SSatish Balay 
842c3acbe9SBarry Smith #if defined(PETSC_BOPT_g)
8557b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
86e3372554SBarry Smith     SETERRQ(1,0,"Cannot mix inserts and adds");
8757b952d6SSatish Balay   }
882c3acbe9SBarry Smith #endif
8957b952d6SSatish Balay   baij->insertmode = addv;
9057b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
91639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
92e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
93e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
94639f9d9dSBarry Smith #endif
9557b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
9657b952d6SSatish Balay       row = im[i] - rstart_orig;
9757b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
9857b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
9957b952d6SSatish Balay           col = in[j] - cstart_orig;
10057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
101639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
10257b952d6SSatish Balay         }
103639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
104e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
105e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
106639f9d9dSBarry Smith #endif
10757b952d6SSatish Balay         else {
10857b952d6SSatish Balay           if (mat->was_assembled) {
109905e6a2fSBarry Smith             if (!baij->colmap) {
110905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
111905e6a2fSBarry Smith             }
112905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
11357b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
11457b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
11557b952d6SSatish Balay               col =  in[j];
11657b952d6SSatish Balay             }
11757b952d6SSatish Balay           }
11857b952d6SSatish Balay           else col = in[j];
11957b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
120639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
12157b952d6SSatish Balay         }
12257b952d6SSatish Balay       }
12357b952d6SSatish Balay     }
12457b952d6SSatish Balay     else {
12590f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
12657b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
12757b952d6SSatish Balay       }
12857b952d6SSatish Balay       else {
12990f02eecSBarry Smith         if (!baij->donotstash) {
13057b952d6SSatish Balay           row = im[i];
13157b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
13257b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
13357b952d6SSatish Balay           }
13457b952d6SSatish Balay         }
13557b952d6SSatish Balay       }
13657b952d6SSatish Balay     }
13790f02eecSBarry Smith   }
13857b952d6SSatish Balay   return 0;
13957b952d6SSatish Balay }
14057b952d6SSatish Balay 
141026e39d0SSatish Balay #undef __FUNCTION__
142026e39d0SSatish Balay #define __FUNCTION__ "MatGetValues_MPIBAIJ"
143d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
144d6de1c52SSatish Balay {
145d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
146d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
147d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
148d6de1c52SSatish Balay 
149d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
150e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
151e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
152d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
153d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
154d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
155e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
156e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
157d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
158d6de1c52SSatish Balay           col = idxn[j] - bscstart;
159d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
160d6de1c52SSatish Balay         }
161d6de1c52SSatish Balay         else {
162905e6a2fSBarry Smith           if (!baij->colmap) {
163905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
164905e6a2fSBarry Smith           }
165e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
166dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
167d9d09a02SSatish Balay           else {
168dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
169d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
170d6de1c52SSatish Balay           }
171d6de1c52SSatish Balay         }
172d6de1c52SSatish Balay       }
173d9d09a02SSatish Balay     }
174d6de1c52SSatish Balay     else {
175e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
176d6de1c52SSatish Balay     }
177d6de1c52SSatish Balay   }
178d6de1c52SSatish Balay   return 0;
179d6de1c52SSatish Balay }
180d6de1c52SSatish Balay 
181026e39d0SSatish Balay #undef __FUNCTION__
182026e39d0SSatish Balay #define __FUNCTION__ "MatNorm_MPIBAIJ"
183d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
184d6de1c52SSatish Balay {
185d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
186d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
187acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
188d6de1c52SSatish Balay   double     sum = 0.0;
189d6de1c52SSatish Balay   Scalar     *v;
190d6de1c52SSatish Balay 
191d6de1c52SSatish Balay   if (baij->size == 1) {
192d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
193d6de1c52SSatish Balay   } else {
194d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
195d6de1c52SSatish Balay       v = amat->a;
196d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
197d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
198d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
199d6de1c52SSatish Balay #else
200d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
201d6de1c52SSatish Balay #endif
202d6de1c52SSatish Balay       }
203d6de1c52SSatish Balay       v = bmat->a;
204d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
205d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
206d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
207d6de1c52SSatish Balay #else
208d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
209d6de1c52SSatish Balay #endif
210d6de1c52SSatish Balay       }
211d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
212d6de1c52SSatish Balay       *norm = sqrt(*norm);
213d6de1c52SSatish Balay     }
214acdf5bf4SSatish Balay     else
215e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
216d6de1c52SSatish Balay   }
217d6de1c52SSatish Balay   return 0;
218d6de1c52SSatish Balay }
21957b952d6SSatish Balay 
220026e39d0SSatish Balay #undef __FUNCTION__
221026e39d0SSatish Balay #define __FUNCTION__ "MatAssemblyBegin_MPIBAIJ"
22257b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
22357b952d6SSatish Balay {
22457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
22557b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
22657b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
22757b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
22857b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
22957b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
23057b952d6SSatish Balay   InsertMode  addv;
23157b952d6SSatish Balay   Scalar      *rvalues,*svalues;
23257b952d6SSatish Balay 
23357b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
23457b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
23557b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
236e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
23757b952d6SSatish Balay   }
23857b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
23957b952d6SSatish Balay 
24057b952d6SSatish Balay   /*  first count number of contributors to each processor */
24157b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
24257b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
24357b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
24457b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
24557b952d6SSatish Balay     idx = baij->stash.idx[i];
24657b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
24757b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
24857b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
24957b952d6SSatish Balay       }
25057b952d6SSatish Balay     }
25157b952d6SSatish Balay   }
25257b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
25357b952d6SSatish Balay 
25457b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
25557b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
25657b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
25757b952d6SSatish Balay   nreceives = work[rank];
25857b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
25957b952d6SSatish Balay   nmax = work[rank];
26057b952d6SSatish Balay   PetscFree(work);
26157b952d6SSatish Balay 
26257b952d6SSatish Balay   /* post receives:
26357b952d6SSatish Balay        1) each message will consist of ordered pairs
26457b952d6SSatish Balay      (global index,value) we store the global index as a double
26557b952d6SSatish Balay      to simplify the message passing.
26657b952d6SSatish Balay        2) since we don't know how long each individual message is we
26757b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
26857b952d6SSatish Balay      this is a lot of wasted space.
26957b952d6SSatish Balay 
27057b952d6SSatish Balay 
27157b952d6SSatish Balay        This could be done better.
27257b952d6SSatish Balay   */
27357b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
27457b952d6SSatish Balay   CHKPTRQ(rvalues);
27557b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
27657b952d6SSatish Balay   CHKPTRQ(recv_waits);
27757b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
27857b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
27957b952d6SSatish Balay               comm,recv_waits+i);
28057b952d6SSatish Balay   }
28157b952d6SSatish Balay 
28257b952d6SSatish Balay   /* do sends:
28357b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
28457b952d6SSatish Balay          the ith processor
28557b952d6SSatish Balay   */
28657b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
28757b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
28857b952d6SSatish Balay   CHKPTRQ(send_waits);
28957b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
29057b952d6SSatish Balay   starts[0] = 0;
29157b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
29257b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
29357b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
29457b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
29557b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
29657b952d6SSatish Balay   }
29757b952d6SSatish Balay   PetscFree(owner);
29857b952d6SSatish Balay   starts[0] = 0;
29957b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
30057b952d6SSatish Balay   count = 0;
30157b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
30257b952d6SSatish Balay     if (procs[i]) {
30357b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
30457b952d6SSatish Balay                 comm,send_waits+count++);
30557b952d6SSatish Balay     }
30657b952d6SSatish Balay   }
30757b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
30857b952d6SSatish Balay 
30957b952d6SSatish Balay   /* Free cache space */
310d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
31157b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
31257b952d6SSatish Balay 
31357b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
31457b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
31557b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
31657b952d6SSatish Balay   baij->rmax       = nmax;
31757b952d6SSatish Balay 
31857b952d6SSatish Balay   return 0;
31957b952d6SSatish Balay }
32057b952d6SSatish Balay 
32157b952d6SSatish Balay 
322026e39d0SSatish Balay #undef __FUNCTION__
323026e39d0SSatish Balay #define __FUNCTION__ "MatAssemblyEnd_MPIBAIJ"
32457b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
32557b952d6SSatish Balay {
32657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
32757b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
32857b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
32957b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
33057b952d6SSatish Balay   Scalar      *values,val;
33157b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
33257b952d6SSatish Balay 
33357b952d6SSatish Balay   /*  wait on receives */
33457b952d6SSatish Balay   while (count) {
33557b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
33657b952d6SSatish Balay     /* unpack receives into our local space */
33757b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
33857b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
33957b952d6SSatish Balay     n = n/3;
34057b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
34157b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
34257b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
34357b952d6SSatish Balay       val = values[3*i+2];
34457b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
34557b952d6SSatish Balay         col -= baij->cstart*bs;
34657b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
34757b952d6SSatish Balay       }
34857b952d6SSatish Balay       else {
34957b952d6SSatish Balay         if (mat->was_assembled) {
350905e6a2fSBarry Smith           if (!baij->colmap) {
351905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
352905e6a2fSBarry Smith           }
353905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
35457b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
35557b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
35657b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
35757b952d6SSatish Balay           }
35857b952d6SSatish Balay         }
35957b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
36057b952d6SSatish Balay       }
36157b952d6SSatish Balay     }
36257b952d6SSatish Balay     count--;
36357b952d6SSatish Balay   }
36457b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
36557b952d6SSatish Balay 
36657b952d6SSatish Balay   /* wait on sends */
36757b952d6SSatish Balay   if (baij->nsends) {
36857b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
36957b952d6SSatish Balay     CHKPTRQ(send_status);
37057b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
37157b952d6SSatish Balay     PetscFree(send_status);
37257b952d6SSatish Balay   }
37357b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
37457b952d6SSatish Balay 
37557b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
37657b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
37757b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
37857b952d6SSatish Balay 
37957b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
38057b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
38157b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
38257b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
38357b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
38457b952d6SSatish Balay   }
38557b952d6SSatish Balay 
3866d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
38757b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
38857b952d6SSatish Balay   }
38957b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
39057b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
39157b952d6SSatish Balay 
39257b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
39357b952d6SSatish Balay   return 0;
39457b952d6SSatish Balay }
39557b952d6SSatish Balay 
396026e39d0SSatish Balay #undef __FUNCTION__
397026e39d0SSatish Balay #define __FUNCTION__ "MatView_MPIBAIJ_Binary"
39857b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
39957b952d6SSatish Balay {
40057b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
40157b952d6SSatish Balay   int          ierr;
40257b952d6SSatish Balay 
40357b952d6SSatish Balay   if (baij->size == 1) {
40457b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
40557b952d6SSatish Balay   }
406e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
40757b952d6SSatish Balay   return 0;
40857b952d6SSatish Balay }
40957b952d6SSatish Balay 
410026e39d0SSatish Balay #undef __FUNCTION__
411026e39d0SSatish Balay #define __FUNCTION__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
41257b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
41357b952d6SSatish Balay {
41457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
415cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
41657b952d6SSatish Balay   FILE         *fd;
41757b952d6SSatish Balay   ViewerType   vtype;
41857b952d6SSatish Balay 
41957b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
42057b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
42157b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
422639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
4234e220ebcSLois Curfman McInnes       MatInfo info;
42457b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
42557b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
4264e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
42757b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
42857b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
4294e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
4304e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
4314e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
4324e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
4334e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
4344e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
43557b952d6SSatish Balay       fflush(fd);
43657b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
43757b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
43857b952d6SSatish Balay       return 0;
43957b952d6SSatish Balay     }
440639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
441bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
44257b952d6SSatish Balay       return 0;
44357b952d6SSatish Balay     }
44457b952d6SSatish Balay   }
44557b952d6SSatish Balay 
44657b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
44757b952d6SSatish Balay     Draw       draw;
44857b952d6SSatish Balay     PetscTruth isnull;
44957b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
45057b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
45157b952d6SSatish Balay   }
45257b952d6SSatish Balay 
45357b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
45457b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
45557b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
45657b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
45757b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
45857b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
45957b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
46057b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
46157b952d6SSatish Balay     fflush(fd);
46257b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
46357b952d6SSatish Balay   }
46457b952d6SSatish Balay   else {
46557b952d6SSatish Balay     int size = baij->size;
46657b952d6SSatish Balay     rank = baij->rank;
46757b952d6SSatish Balay     if (size == 1) {
46857b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
46957b952d6SSatish Balay     }
47057b952d6SSatish Balay     else {
47157b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
47257b952d6SSatish Balay       Mat         A;
47357b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
47457b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
47557b952d6SSatish Balay       int         mbs=baij->mbs;
47657b952d6SSatish Balay       Scalar      *a;
47757b952d6SSatish Balay 
47857b952d6SSatish Balay       if (!rank) {
479cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
48057b952d6SSatish Balay         CHKERRQ(ierr);
48157b952d6SSatish Balay       }
48257b952d6SSatish Balay       else {
483cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
48457b952d6SSatish Balay         CHKERRQ(ierr);
48557b952d6SSatish Balay       }
48657b952d6SSatish Balay       PLogObjectParent(mat,A);
48757b952d6SSatish Balay 
48857b952d6SSatish Balay       /* copy over the A part */
48957b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
49057b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
49157b952d6SSatish Balay       row = baij->rstart;
49257b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
49357b952d6SSatish Balay 
49457b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
49557b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
49657b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
49757b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
49857b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
49957b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
500cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
501cee3aa6bSSatish Balay             col++; a += bs;
50257b952d6SSatish Balay           }
50357b952d6SSatish Balay         }
50457b952d6SSatish Balay       }
50557b952d6SSatish Balay       /* copy over the B part */
50657b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
50757b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
50857b952d6SSatish Balay       row = baij->rstart*bs;
50957b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
51057b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
51157b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
51257b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
51357b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
51457b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
515cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
516cee3aa6bSSatish Balay             col++; a += bs;
51757b952d6SSatish Balay           }
51857b952d6SSatish Balay         }
51957b952d6SSatish Balay       }
52057b952d6SSatish Balay       PetscFree(rvals);
5216d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5226d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
52357b952d6SSatish Balay       if (!rank) {
52457b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
52557b952d6SSatish Balay       }
52657b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
52757b952d6SSatish Balay     }
52857b952d6SSatish Balay   }
52957b952d6SSatish Balay   return 0;
53057b952d6SSatish Balay }
53157b952d6SSatish Balay 
53257b952d6SSatish Balay 
53357b952d6SSatish Balay 
534026e39d0SSatish Balay #undef __FUNCTION__
535026e39d0SSatish Balay #define __FUNCTION__ "MatView_MPIBAIJ"
53657b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
53757b952d6SSatish Balay {
53857b952d6SSatish Balay   Mat         mat = (Mat) obj;
53957b952d6SSatish Balay   int         ierr;
54057b952d6SSatish Balay   ViewerType  vtype;
54157b952d6SSatish Balay 
54257b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
54357b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
54457b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
54557b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
54657b952d6SSatish Balay   }
54757b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
54857b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
54957b952d6SSatish Balay   }
55057b952d6SSatish Balay   return 0;
55157b952d6SSatish Balay }
55257b952d6SSatish Balay 
553026e39d0SSatish Balay #undef __FUNCTION__
554026e39d0SSatish Balay #define __FUNCTION__ "MatDestroy_MPIBAIJ"
55579bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
55679bdfe76SSatish Balay {
55779bdfe76SSatish Balay   Mat         mat = (Mat) obj;
55879bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
55979bdfe76SSatish Balay   int         ierr;
56079bdfe76SSatish Balay 
56179bdfe76SSatish Balay #if defined(PETSC_LOG)
56279bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
56379bdfe76SSatish Balay #endif
56479bdfe76SSatish Balay 
56579bdfe76SSatish Balay   PetscFree(baij->rowners);
56679bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
56779bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
56879bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
56979bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
57079bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
57179bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
57279bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
57379bdfe76SSatish Balay   PetscFree(baij);
57490f02eecSBarry Smith   if (mat->mapping) {
57590f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
57690f02eecSBarry Smith   }
57779bdfe76SSatish Balay   PLogObjectDestroy(mat);
57879bdfe76SSatish Balay   PetscHeaderDestroy(mat);
57979bdfe76SSatish Balay   return 0;
58079bdfe76SSatish Balay }
58179bdfe76SSatish Balay 
582026e39d0SSatish Balay #undef __FUNCTION__
583026e39d0SSatish Balay #define __FUNCTION__ "MatMult_MPIBAIJ"
584cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
585cee3aa6bSSatish Balay {
586cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
58747b4a8eaSLois Curfman McInnes   int         ierr, nt;
588cee3aa6bSSatish Balay 
589c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
59047b4a8eaSLois Curfman McInnes   if (nt != a->n) {
591e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and xx");
59247b4a8eaSLois Curfman McInnes   }
593c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
59447b4a8eaSLois Curfman McInnes   if (nt != a->m) {
595e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
59647b4a8eaSLois Curfman McInnes   }
59743a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
59943a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
60143a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602cee3aa6bSSatish Balay   return 0;
603cee3aa6bSSatish Balay }
604cee3aa6bSSatish Balay 
605026e39d0SSatish Balay #undef __FUNCTION__
606026e39d0SSatish Balay #define __FUNCTION__ "MatMultAdd_MPIBAIJ"
607cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
608cee3aa6bSSatish Balay {
609cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
610cee3aa6bSSatish Balay   int        ierr;
61143a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
612cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
61343a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
614cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
615cee3aa6bSSatish Balay   return 0;
616cee3aa6bSSatish Balay }
617cee3aa6bSSatish Balay 
618026e39d0SSatish Balay #undef __FUNCTION__
619026e39d0SSatish Balay #define __FUNCTION__ "MatMultTrans_MPIBAIJ"
620cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
621cee3aa6bSSatish Balay {
622cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
623cee3aa6bSSatish Balay   int        ierr;
624cee3aa6bSSatish Balay 
625cee3aa6bSSatish Balay   /* do nondiagonal part */
626cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
627cee3aa6bSSatish Balay   /* send it on its way */
628537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
629cee3aa6bSSatish Balay   /* do local part */
630cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
631cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
632cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
633cee3aa6bSSatish Balay   /* but is not perhaps always true. */
634639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
635cee3aa6bSSatish Balay   return 0;
636cee3aa6bSSatish Balay }
637cee3aa6bSSatish Balay 
638026e39d0SSatish Balay #undef __FUNCTION__
639026e39d0SSatish Balay #define __FUNCTION__ "MatMultTransAdd_MPIBAIJ"
640cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
641cee3aa6bSSatish Balay {
642cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
643cee3aa6bSSatish Balay   int        ierr;
644cee3aa6bSSatish Balay 
645cee3aa6bSSatish Balay   /* do nondiagonal part */
646cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
647cee3aa6bSSatish Balay   /* send it on its way */
648537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
649cee3aa6bSSatish Balay   /* do local part */
650cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
651cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
652cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
653cee3aa6bSSatish Balay   /* but is not perhaps always true. */
654537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
655cee3aa6bSSatish Balay   return 0;
656cee3aa6bSSatish Balay }
657cee3aa6bSSatish Balay 
658cee3aa6bSSatish Balay /*
659cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
660cee3aa6bSSatish Balay    diagonal block
661cee3aa6bSSatish Balay */
662026e39d0SSatish Balay #undef __FUNCTION__
663026e39d0SSatish Balay #define __FUNCTION__ "MatGetDiagonal_MPIBAIJ"
664cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
665cee3aa6bSSatish Balay {
666cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
667cee3aa6bSSatish Balay   if (a->M != a->N)
668e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
669cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
670cee3aa6bSSatish Balay }
671cee3aa6bSSatish Balay 
672026e39d0SSatish Balay #undef __FUNCTION__
673026e39d0SSatish Balay #define __FUNCTION__ "MatScale_MPIBAIJ"
674cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
675cee3aa6bSSatish Balay {
676cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
677cee3aa6bSSatish Balay   int        ierr;
678cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
679cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
680cee3aa6bSSatish Balay   return 0;
681cee3aa6bSSatish Balay }
682026e39d0SSatish Balay 
683026e39d0SSatish Balay #undef __FUNCTION__
684026e39d0SSatish Balay #define __FUNCTION__ "MatGetSize_MPIBAIJ"
68557b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
68657b952d6SSatish Balay {
68757b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
68857b952d6SSatish Balay   *m = mat->M; *n = mat->N;
68957b952d6SSatish Balay   return 0;
69057b952d6SSatish Balay }
69157b952d6SSatish Balay 
692026e39d0SSatish Balay #undef __FUNCTION__
693026e39d0SSatish Balay #define __FUNCTION__ "MatGetLocalSize_MPIBAIJ"
69457b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
69557b952d6SSatish Balay {
69657b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
69757b952d6SSatish Balay   *m = mat->m; *n = mat->N;
69857b952d6SSatish Balay   return 0;
69957b952d6SSatish Balay }
70057b952d6SSatish Balay 
701026e39d0SSatish Balay #undef __FUNCTION__
702026e39d0SSatish Balay #define __FUNCTION__ "MatGetOwnershipRange_MPIBAIJ"
70357b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
70457b952d6SSatish Balay {
70557b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
70657b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
70757b952d6SSatish Balay   return 0;
70857b952d6SSatish Balay }
70957b952d6SSatish Balay 
710acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
711acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
712acdf5bf4SSatish Balay 
713026e39d0SSatish Balay #undef __FUNCTION__
714026e39d0SSatish Balay #define __FUNCTION__ "MatGetRow_MPIBAIJ"
715acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
716acdf5bf4SSatish Balay {
717acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
718acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
719acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
720d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
721d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
722acdf5bf4SSatish Balay 
723e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
724acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
725acdf5bf4SSatish Balay 
726acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
727acdf5bf4SSatish Balay     /*
728acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
729acdf5bf4SSatish Balay     */
730acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
731bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
732bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
733acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
734acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
735acdf5bf4SSatish Balay     }
736acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
737acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
738acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
739acdf5bf4SSatish Balay   }
740acdf5bf4SSatish Balay 
741acdf5bf4SSatish Balay 
742e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
743d9d09a02SSatish Balay   lrow = row - brstart;
744acdf5bf4SSatish Balay 
745acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
746acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
747acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
748acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
749acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
750acdf5bf4SSatish Balay   nztot = nzA + nzB;
751acdf5bf4SSatish Balay 
752acdf5bf4SSatish Balay   cmap  = mat->garray;
753acdf5bf4SSatish Balay   if (v  || idx) {
754acdf5bf4SSatish Balay     if (nztot) {
755acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
756acdf5bf4SSatish Balay       int imark = -1;
757acdf5bf4SSatish Balay       if (v) {
758acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
759acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
760d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
761acdf5bf4SSatish Balay           else break;
762acdf5bf4SSatish Balay         }
763acdf5bf4SSatish Balay         imark = i;
764acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
765acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
766acdf5bf4SSatish Balay       }
767acdf5bf4SSatish Balay       if (idx) {
768acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
769acdf5bf4SSatish Balay         if (imark > -1) {
770acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
771bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
772acdf5bf4SSatish Balay           }
773acdf5bf4SSatish Balay         } else {
774acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
775d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
776d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
777acdf5bf4SSatish Balay             else break;
778acdf5bf4SSatish Balay           }
779acdf5bf4SSatish Balay           imark = i;
780acdf5bf4SSatish Balay         }
781d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
782d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
783acdf5bf4SSatish Balay       }
784acdf5bf4SSatish Balay     }
785d212a18eSSatish Balay     else {
786d212a18eSSatish Balay       if (idx) *idx = 0;
787d212a18eSSatish Balay       if (v)   *v   = 0;
788d212a18eSSatish Balay     }
789acdf5bf4SSatish Balay   }
790acdf5bf4SSatish Balay   *nz = nztot;
791acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
792acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
793acdf5bf4SSatish Balay   return 0;
794acdf5bf4SSatish Balay }
795acdf5bf4SSatish Balay 
796026e39d0SSatish Balay #undef __FUNCTION__
797026e39d0SSatish Balay #define __FUNCTION__ "MatRestoreRow_MPIBAIJ"
798acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
799acdf5bf4SSatish Balay {
800acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
801acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
802e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
803acdf5bf4SSatish Balay   }
804acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
805acdf5bf4SSatish Balay   return 0;
806acdf5bf4SSatish Balay }
807acdf5bf4SSatish Balay 
808026e39d0SSatish Balay #undef __FUNCTION__
809026e39d0SSatish Balay #define __FUNCTION__ "MatGetBlockSize_MPIBAIJ"
8105a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
8115a838052SSatish Balay {
8125a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
8135a838052SSatish Balay   *bs = baij->bs;
8145a838052SSatish Balay   return 0;
8155a838052SSatish Balay }
8165a838052SSatish Balay 
817026e39d0SSatish Balay #undef __FUNCTION__
818026e39d0SSatish Balay #define __FUNCTION__ "MatZeroEntries_MPIBAIJ"
81958667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A)
82058667388SSatish Balay {
82158667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
82258667388SSatish Balay   int         ierr;
82358667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
82458667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
82558667388SSatish Balay   return 0;
82658667388SSatish Balay }
8270ac07820SSatish Balay 
828026e39d0SSatish Balay #undef __FUNCTION__
829026e39d0SSatish Balay #define __FUNCTION__ "MatGetInfo_MPIBAIJ"
8304e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
8310ac07820SSatish Balay {
8324e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
8334e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
8347d57db60SLois Curfman McInnes   int         ierr;
8357d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
8360ac07820SSatish Balay 
8374e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
8384e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
8394e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
8404e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
8414e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
8424e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
8434e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
8444e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
8454e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
8460ac07820SSatish Balay   if (flag == MAT_LOCAL) {
8474e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
8484e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
8494e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
8504e220ebcSLois Curfman McInnes     info->memory       = isend[3];
8514e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
8520ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
8530ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm);
8544e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8554e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8564e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8574e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8584e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8590ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
8600ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm);
8614e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8624e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8634e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8644e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8654e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8660ac07820SSatish Balay   }
8674e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
8684e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
8694e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
8700ac07820SSatish Balay   return 0;
8710ac07820SSatish Balay }
8720ac07820SSatish Balay 
873026e39d0SSatish Balay #undef __FUNCTION__
874026e39d0SSatish Balay #define __FUNCTION__ "MatSetOption_MPIBAIJ"
87558667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op)
87658667388SSatish Balay {
87758667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
87858667388SSatish Balay 
87958667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
88058667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
8816da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
882b1fbbac0SLois Curfman McInnes       op == MAT_COLUMNS_SORTED) {
883b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
884b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
885b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
886aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
88758667388SSatish Balay         MatSetOption(a->A,op);
88858667388SSatish Balay         MatSetOption(a->B,op);
889b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
8906da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
89158667388SSatish Balay              op == MAT_SYMMETRIC ||
89258667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
89358667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
89458667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
89558667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
89658667388SSatish Balay     a->roworiented = 0;
89758667388SSatish Balay     MatSetOption(a->A,op);
89858667388SSatish Balay     MatSetOption(a->B,op);
89990f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
90090f02eecSBarry Smith     a->donotstash = 1;
90190f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
902e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
90358667388SSatish Balay   else
904e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
90558667388SSatish Balay   return 0;
90658667388SSatish Balay }
90758667388SSatish Balay 
908026e39d0SSatish Balay #undef __FUNCTION__
909026e39d0SSatish Balay #define __FUNCTION__ "MatTranspose_MPIBAIJ("
9100ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
9110ac07820SSatish Balay {
9120ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
9130ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
9140ac07820SSatish Balay   Mat        B;
9150ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
9160ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
9170ac07820SSatish Balay   Scalar     *a;
9180ac07820SSatish Balay 
9190ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
920e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
9210ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
9220ac07820SSatish Balay   CHKERRQ(ierr);
9230ac07820SSatish Balay 
9240ac07820SSatish Balay   /* copy over the A part */
9250ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
9260ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
9270ac07820SSatish Balay   row = baij->rstart;
9280ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
9290ac07820SSatish Balay 
9300ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
9310ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
9320ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
9330ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
9340ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
9350ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
9360ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
9370ac07820SSatish Balay         col++; a += bs;
9380ac07820SSatish Balay       }
9390ac07820SSatish Balay     }
9400ac07820SSatish Balay   }
9410ac07820SSatish Balay   /* copy over the B part */
9420ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
9430ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
9440ac07820SSatish Balay   row = baij->rstart*bs;
9450ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
9460ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
9470ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
9480ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
9490ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
9500ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
9510ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
9520ac07820SSatish Balay         col++; a += bs;
9530ac07820SSatish Balay       }
9540ac07820SSatish Balay     }
9550ac07820SSatish Balay   }
9560ac07820SSatish Balay   PetscFree(rvals);
9570ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
9580ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
9590ac07820SSatish Balay 
9600ac07820SSatish Balay   if (matout != PETSC_NULL) {
9610ac07820SSatish Balay     *matout = B;
9620ac07820SSatish Balay   } else {
9630ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
9640ac07820SSatish Balay     PetscFree(baij->rowners);
9650ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
9660ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
9670ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
9680ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
9690ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
9700ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
9710ac07820SSatish Balay     PetscFree(baij);
9720ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
9730ac07820SSatish Balay     PetscHeaderDestroy(B);
9740ac07820SSatish Balay   }
9750ac07820SSatish Balay   return 0;
9760ac07820SSatish Balay }
9770e95ebc0SSatish Balay 
978026e39d0SSatish Balay #undef __FUNCTION__
979026e39d0SSatish Balay #define __FUNCTION__ "MatDiagonalScale_MPIBAIJ"
9800e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
9810e95ebc0SSatish Balay {
9820e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
9830e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
9840e95ebc0SSatish Balay   int ierr,s1,s2,s3;
9850e95ebc0SSatish Balay 
9860e95ebc0SSatish Balay   if (ll)  {
9870e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
9880e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
989e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
9900e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
9910e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
9920e95ebc0SSatish Balay   }
993e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
9940e95ebc0SSatish Balay   return 0;
9950e95ebc0SSatish Balay }
9960e95ebc0SSatish Balay 
9970ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
9980ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
9990ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
10000ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
10010ac07820SSatish Balay    routine.
10020ac07820SSatish Balay */
1003026e39d0SSatish Balay #undef __FUNCTION__
1004026e39d0SSatish Balay #define __FUNCTION__ "MatZeroRows_MPIBAIJ"
10050ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
10060ac07820SSatish Balay {
10070ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
10080ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
10090ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
10100ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
10110ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
10120ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
10130ac07820SSatish Balay   MPI_Comm       comm = A->comm;
10140ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
10150ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
10160ac07820SSatish Balay   IS             istmp;
10170ac07820SSatish Balay 
10180ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
10190ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
10200ac07820SSatish Balay 
10210ac07820SSatish Balay   /*  first count number of contributors to each processor */
10220ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
10230ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
10240ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
10250ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
10260ac07820SSatish Balay     idx = rows[i];
10270ac07820SSatish Balay     found = 0;
10280ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
10290ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
10300ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
10310ac07820SSatish Balay       }
10320ac07820SSatish Balay     }
1033e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
10340ac07820SSatish Balay   }
10350ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
10360ac07820SSatish Balay 
10370ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
10380ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
10390ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
10400ac07820SSatish Balay   nrecvs = work[rank];
10410ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
10420ac07820SSatish Balay   nmax = work[rank];
10430ac07820SSatish Balay   PetscFree(work);
10440ac07820SSatish Balay 
10450ac07820SSatish Balay   /* post receives:   */
10460ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
10470ac07820SSatish Balay   CHKPTRQ(rvalues);
10480ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
10490ac07820SSatish Balay   CHKPTRQ(recv_waits);
10500ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
10510ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
10520ac07820SSatish Balay   }
10530ac07820SSatish Balay 
10540ac07820SSatish Balay   /* do sends:
10550ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
10560ac07820SSatish Balay          the ith processor
10570ac07820SSatish Balay   */
10580ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
10590ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
10600ac07820SSatish Balay   CHKPTRQ(send_waits);
10610ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
10620ac07820SSatish Balay   starts[0] = 0;
10630ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
10640ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
10650ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
10660ac07820SSatish Balay   }
10670ac07820SSatish Balay   ISRestoreIndices(is,&rows);
10680ac07820SSatish Balay 
10690ac07820SSatish Balay   starts[0] = 0;
10700ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
10710ac07820SSatish Balay   count = 0;
10720ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
10730ac07820SSatish Balay     if (procs[i]) {
10740ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
10750ac07820SSatish Balay     }
10760ac07820SSatish Balay   }
10770ac07820SSatish Balay   PetscFree(starts);
10780ac07820SSatish Balay 
10790ac07820SSatish Balay   base = owners[rank]*bs;
10800ac07820SSatish Balay 
10810ac07820SSatish Balay   /*  wait on receives */
10820ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
10830ac07820SSatish Balay   source = lens + nrecvs;
10840ac07820SSatish Balay   count  = nrecvs; slen = 0;
10850ac07820SSatish Balay   while (count) {
10860ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
10870ac07820SSatish Balay     /* unpack receives into our local space */
10880ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
10890ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
10900ac07820SSatish Balay     lens[imdex]  = n;
10910ac07820SSatish Balay     slen += n;
10920ac07820SSatish Balay     count--;
10930ac07820SSatish Balay   }
10940ac07820SSatish Balay   PetscFree(recv_waits);
10950ac07820SSatish Balay 
10960ac07820SSatish Balay   /* move the data into the send scatter */
10970ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
10980ac07820SSatish Balay   count = 0;
10990ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
11000ac07820SSatish Balay     values = rvalues + i*nmax;
11010ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
11020ac07820SSatish Balay       lrows[count++] = values[j] - base;
11030ac07820SSatish Balay     }
11040ac07820SSatish Balay   }
11050ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
11060ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
11070ac07820SSatish Balay 
11080ac07820SSatish Balay   /* actually zap the local rows */
1109537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
11100ac07820SSatish Balay   PLogObjectParent(A,istmp);
11110ac07820SSatish Balay   PetscFree(lrows);
11120ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
11130ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
11140ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
11150ac07820SSatish Balay 
11160ac07820SSatish Balay   /* wait on sends */
11170ac07820SSatish Balay   if (nsends) {
11180ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
11190ac07820SSatish Balay     CHKPTRQ(send_status);
11200ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
11210ac07820SSatish Balay     PetscFree(send_status);
11220ac07820SSatish Balay   }
11230ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
11240ac07820SSatish Balay 
11250ac07820SSatish Balay   return 0;
11260ac07820SSatish Balay }
1127ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
1128026e39d0SSatish Balay #undef __FUNCTION__
1129026e39d0SSatish Balay #define __FUNCTION__ "MatPrintHelp_MPIBAIJ"
1130ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A)
1131ba4ca20aSSatish Balay {
1132ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1133ba4ca20aSSatish Balay 
1134ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1135ba4ca20aSSatish Balay   else return 0;
1136ba4ca20aSSatish Balay }
11370ac07820SSatish Balay 
1138*bb5a7306SBarry Smith #undef __FUNCTION__
1139*bb5a7306SBarry Smith #define __FUNCTION__ "MatSetUnfactored_MPIBAIJ"
1140*bb5a7306SBarry Smith static int MatSetUnfactored_MPIBAIJ(Mat A)
1141*bb5a7306SBarry Smith {
1142*bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1143*bb5a7306SBarry Smith   int         ierr;
1144*bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1145*bb5a7306SBarry Smith   return 0;
1146*bb5a7306SBarry Smith }
1147*bb5a7306SBarry Smith 
11480ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
11490ac07820SSatish Balay 
115079bdfe76SSatish Balay /* -------------------------------------------------------------------*/
115179bdfe76SSatish Balay static struct _MatOps MatOps = {
1152bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
11534c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
11544c50302cSBarry Smith   0,0,0,0,
11550ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
11560e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
115758667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
11584c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
11594c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
11604c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1161905e6a2fSBarry Smith   0,0,0,MatConvertSameType_MPIBAIJ,0,0,
1162d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1163ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1164*bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1165*bb5a7306SBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ};
116679bdfe76SSatish Balay 
116779bdfe76SSatish Balay 
1168026e39d0SSatish Balay #undef __FUNCTION__
1169026e39d0SSatish Balay #define __FUNCTION__ "MatCreateMPIBAIJ"
117079bdfe76SSatish Balay /*@C
117179bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
117279bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
117379bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
117479bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
117579bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
117679bdfe76SSatish Balay 
117779bdfe76SSatish Balay    Input Parameters:
117879bdfe76SSatish Balay .  comm - MPI communicator
117979bdfe76SSatish Balay .  bs   - size of blockk
118079bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
118192e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
118292e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
118392e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
118492e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
118592e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
118679bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
118792e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
118879bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
118979bdfe76SSatish Balay            submatrix  (same for all local rows)
119092e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
119192e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
119292e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
119392e8d321SLois Curfman McInnes            it is zero.
119492e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
119579bdfe76SSatish Balay            submatrix (same for all local rows).
119692e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
119792e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
119892e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
119979bdfe76SSatish Balay 
120079bdfe76SSatish Balay    Output Parameter:
120179bdfe76SSatish Balay .  A - the matrix
120279bdfe76SSatish Balay 
120379bdfe76SSatish Balay    Notes:
120479bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
120579bdfe76SSatish Balay    (possibly both).
120679bdfe76SSatish Balay 
120779bdfe76SSatish Balay    Storage Information:
120879bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
120979bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
121079bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
121179bdfe76SSatish Balay    local matrix (a rectangular submatrix).
121279bdfe76SSatish Balay 
121379bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
121479bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
121579bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
121679bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
121779bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
121879bdfe76SSatish Balay 
121979bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
122079bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
122179bdfe76SSatish Balay 
122279bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
122379bdfe76SSatish Balay $         -------------------
122479bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
122579bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
122679bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
122779bdfe76SSatish Balay $         -------------------
122879bdfe76SSatish Balay $
122979bdfe76SSatish Balay 
123079bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
123179bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
123279bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
123357b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
123479bdfe76SSatish Balay 
123579bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
123679bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
123779bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
123892e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
123992e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
12406da5968aSLois Curfman McInnes    matrices.
124179bdfe76SSatish Balay 
124292e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
124379bdfe76SSatish Balay 
124479bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
124579bdfe76SSatish Balay @*/
124679bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
124779bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
124879bdfe76SSatish Balay {
124979bdfe76SSatish Balay   Mat          B;
125079bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
12514c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
125279bdfe76SSatish Balay 
1253e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
125479bdfe76SSatish Balay   *A = 0;
125579bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
125679bdfe76SSatish Balay   PLogObjectCreate(B);
125779bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
125879bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
125979bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
12604c50302cSBarry Smith   MPI_Comm_size(comm,&size);
12614c50302cSBarry Smith   if (size == 1) {
12624c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
12634c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
12644c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
12654c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
12664c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
12674c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
12684c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
12694c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
12704c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
12714c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
12724c50302cSBarry Smith   }
12734c50302cSBarry Smith 
127479bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
127579bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
127690f02eecSBarry Smith   B->mapping    = 0;
127779bdfe76SSatish Balay   B->factor     = 0;
127879bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
127979bdfe76SSatish Balay 
128079bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
128179bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
128279bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
128379bdfe76SSatish Balay 
128479bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1285e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1286e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1287e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1288cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1289cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
129079bdfe76SSatish Balay 
129179bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
129279bdfe76SSatish Balay     work[0] = m; work[1] = n;
129379bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
129479bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
129579bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
129679bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
129779bdfe76SSatish Balay   }
129879bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
129979bdfe76SSatish Balay     Mbs = M/bs;
1300e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
130179bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
130279bdfe76SSatish Balay     m   = mbs*bs;
130379bdfe76SSatish Balay   }
130479bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
130579bdfe76SSatish Balay     Nbs = N/bs;
1306e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
130779bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
130879bdfe76SSatish Balay     n   = nbs*bs;
130979bdfe76SSatish Balay   }
1310e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
131179bdfe76SSatish Balay 
131279bdfe76SSatish Balay   b->m = m; B->m = m;
131379bdfe76SSatish Balay   b->n = n; B->n = n;
131479bdfe76SSatish Balay   b->N = N; B->N = N;
131579bdfe76SSatish Balay   b->M = M; B->M = M;
131679bdfe76SSatish Balay   b->bs  = bs;
131779bdfe76SSatish Balay   b->bs2 = bs*bs;
131879bdfe76SSatish Balay   b->mbs = mbs;
131979bdfe76SSatish Balay   b->nbs = nbs;
132079bdfe76SSatish Balay   b->Mbs = Mbs;
132179bdfe76SSatish Balay   b->Nbs = Nbs;
132279bdfe76SSatish Balay 
132379bdfe76SSatish Balay   /* build local table of row and column ownerships */
132479bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
132579bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
13260ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
132779bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
132879bdfe76SSatish Balay   b->rowners[0] = 0;
132979bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
133079bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
133179bdfe76SSatish Balay   }
133279bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
133379bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
13344fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
13354fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
13364fa0d573SSatish Balay 
133779bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
133879bdfe76SSatish Balay   b->cowners[0] = 0;
133979bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
134079bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
134179bdfe76SSatish Balay   }
134279bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
134379bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
13444fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
13454fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
134679bdfe76SSatish Balay 
134779bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
134879bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
134979bdfe76SSatish Balay   PLogObjectParent(B,b->A);
135079bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
135179bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
135279bdfe76SSatish Balay   PLogObjectParent(B,b->B);
135379bdfe76SSatish Balay 
135479bdfe76SSatish Balay   /* build cache for off array entries formed */
135579bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
135690f02eecSBarry Smith   b->donotstash  = 0;
135779bdfe76SSatish Balay   b->colmap      = 0;
135879bdfe76SSatish Balay   b->garray      = 0;
135979bdfe76SSatish Balay   b->roworiented = 1;
136079bdfe76SSatish Balay 
136179bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
136279bdfe76SSatish Balay   b->lvec      = 0;
136379bdfe76SSatish Balay   b->Mvctx     = 0;
136479bdfe76SSatish Balay 
136579bdfe76SSatish Balay   /* stuff for MatGetRow() */
136679bdfe76SSatish Balay   b->rowindices   = 0;
136779bdfe76SSatish Balay   b->rowvalues    = 0;
136879bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
136979bdfe76SSatish Balay 
137079bdfe76SSatish Balay   *A = B;
137179bdfe76SSatish Balay   return 0;
137279bdfe76SSatish Balay }
1373026e39d0SSatish Balay 
1374026e39d0SSatish Balay #undef __FUNCTION__
1375026e39d0SSatish Balay #define __FUNCTION__ "MatConvertSameType_MPIBAIJ"
13760ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
13770ac07820SSatish Balay {
13780ac07820SSatish Balay   Mat        mat;
13790ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
13800ac07820SSatish Balay   int        ierr, len=0, flg;
13810ac07820SSatish Balay 
13820ac07820SSatish Balay   *newmat       = 0;
13830ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
13840ac07820SSatish Balay   PLogObjectCreate(mat);
13850ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
13860ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
13870ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
13880ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
13890ac07820SSatish Balay   mat->factor     = matin->factor;
13900ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
13910ac07820SSatish Balay 
13920ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
13930ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
13940ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
13950ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
13960ac07820SSatish Balay 
13970ac07820SSatish Balay   a->bs  = oldmat->bs;
13980ac07820SSatish Balay   a->bs2 = oldmat->bs2;
13990ac07820SSatish Balay   a->mbs = oldmat->mbs;
14000ac07820SSatish Balay   a->nbs = oldmat->nbs;
14010ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
14020ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
14030ac07820SSatish Balay 
14040ac07820SSatish Balay   a->rstart       = oldmat->rstart;
14050ac07820SSatish Balay   a->rend         = oldmat->rend;
14060ac07820SSatish Balay   a->cstart       = oldmat->cstart;
14070ac07820SSatish Balay   a->cend         = oldmat->cend;
14080ac07820SSatish Balay   a->size         = oldmat->size;
14090ac07820SSatish Balay   a->rank         = oldmat->rank;
14100ac07820SSatish Balay   a->insertmode   = NOT_SET_VALUES;
14110ac07820SSatish Balay   a->rowvalues    = 0;
14120ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
14130ac07820SSatish Balay 
14140ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
14150ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
14160ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
14170ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
14180ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14190ac07820SSatish Balay   if (oldmat->colmap) {
14200ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
14210ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
14220ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
14230ac07820SSatish Balay   } else a->colmap = 0;
14240ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
14250ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
14260ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
14270ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
14280ac07820SSatish Balay   } else a->garray = 0;
14290ac07820SSatish Balay 
14300ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
14310ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
14320ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
14330ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
14340ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
14350ac07820SSatish Balay   PLogObjectParent(mat,a->A);
14360ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
14370ac07820SSatish Balay   PLogObjectParent(mat,a->B);
14380ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
14390ac07820SSatish Balay   if (flg) {
14400ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
14410ac07820SSatish Balay   }
14420ac07820SSatish Balay   *newmat = mat;
14430ac07820SSatish Balay   return 0;
14440ac07820SSatish Balay }
144557b952d6SSatish Balay 
144657b952d6SSatish Balay #include "sys.h"
144757b952d6SSatish Balay 
1448026e39d0SSatish Balay #undef __FUNCTION__
1449026e39d0SSatish Balay #define __FUNCTION__ "MatLoad_MPIBAIJ"
145057b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
145157b952d6SSatish Balay {
145257b952d6SSatish Balay   Mat          A;
145357b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
145457b952d6SSatish Balay   Scalar       *vals,*buf;
145557b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
145657b952d6SSatish Balay   MPI_Status   status;
1457cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
145857b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
145957b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
146057b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
146157b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
146257b952d6SSatish Balay 
146357b952d6SSatish Balay 
146457b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
146557b952d6SSatish Balay   bs2  = bs*bs;
146657b952d6SSatish Balay 
146757b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
146857b952d6SSatish Balay   if (!rank) {
146957b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
147057b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1471e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
147257b952d6SSatish Balay   }
147357b952d6SSatish Balay 
147457b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
147557b952d6SSatish Balay   M = header[1]; N = header[2];
147657b952d6SSatish Balay 
1477e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
147857b952d6SSatish Balay 
147957b952d6SSatish Balay   /*
148057b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
148157b952d6SSatish Balay      divisible by the blocksize
148257b952d6SSatish Balay   */
148357b952d6SSatish Balay   Mbs        = M/bs;
148457b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
148557b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
148657b952d6SSatish Balay   else                  Mbs++;
148757b952d6SSatish Balay   if (extra_rows &&!rank) {
1488b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
148957b952d6SSatish Balay   }
1490537820f0SBarry Smith 
149157b952d6SSatish Balay   /* determine ownership of all rows */
149257b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
149357b952d6SSatish Balay   m   = mbs * bs;
1494cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1495cee3aa6bSSatish Balay   browners = rowners + size + 1;
149657b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
149757b952d6SSatish Balay   rowners[0] = 0;
1498cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1499cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
150057b952d6SSatish Balay   rstart = rowners[rank];
150157b952d6SSatish Balay   rend   = rowners[rank+1];
150257b952d6SSatish Balay 
150357b952d6SSatish Balay   /* distribute row lengths to all processors */
150457b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
150557b952d6SSatish Balay   if (!rank) {
150657b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
150757b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
150857b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
150957b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1510cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1511cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
151257b952d6SSatish Balay     PetscFree(sndcounts);
151357b952d6SSatish Balay   }
151457b952d6SSatish Balay   else {
151557b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
151657b952d6SSatish Balay   }
151757b952d6SSatish Balay 
151857b952d6SSatish Balay   if (!rank) {
151957b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
152057b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
152157b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
152257b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
152357b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
152457b952d6SSatish Balay         procsnz[i] += rowlengths[j];
152557b952d6SSatish Balay       }
152657b952d6SSatish Balay     }
152757b952d6SSatish Balay     PetscFree(rowlengths);
152857b952d6SSatish Balay 
152957b952d6SSatish Balay     /* determine max buffer needed and allocate it */
153057b952d6SSatish Balay     maxnz = 0;
153157b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
153257b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
153357b952d6SSatish Balay     }
153457b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
153557b952d6SSatish Balay 
153657b952d6SSatish Balay     /* read in my part of the matrix column indices  */
153757b952d6SSatish Balay     nz = procsnz[0];
153857b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
153957b952d6SSatish Balay     mycols = ibuf;
1540cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
154157b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1542cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1543cee3aa6bSSatish Balay 
154457b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
154557b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
154657b952d6SSatish Balay       nz = procsnz[i];
154757b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
154857b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
154957b952d6SSatish Balay     }
155057b952d6SSatish Balay     /* read in the stuff for the last proc */
155157b952d6SSatish Balay     if ( size != 1 ) {
155257b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
155357b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
155457b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1555cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
155657b952d6SSatish Balay     }
155757b952d6SSatish Balay     PetscFree(cols);
155857b952d6SSatish Balay   }
155957b952d6SSatish Balay   else {
156057b952d6SSatish Balay     /* determine buffer space needed for message */
156157b952d6SSatish Balay     nz = 0;
156257b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
156357b952d6SSatish Balay       nz += locrowlens[i];
156457b952d6SSatish Balay     }
156557b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
156657b952d6SSatish Balay     mycols = ibuf;
156757b952d6SSatish Balay     /* receive message of column indices*/
156857b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
156957b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1570e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
157157b952d6SSatish Balay   }
157257b952d6SSatish Balay 
157357b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1574cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1575cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
157657b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1577cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
157857b952d6SSatish Balay   masked1 = mask    + Mbs;
157957b952d6SSatish Balay   masked2 = masked1 + Mbs;
158057b952d6SSatish Balay   rowcount = 0; nzcount = 0;
158157b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
158257b952d6SSatish Balay     dcount  = 0;
158357b952d6SSatish Balay     odcount = 0;
158457b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
158557b952d6SSatish Balay       kmax = locrowlens[rowcount];
158657b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
158757b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
158857b952d6SSatish Balay         if (!mask[tmp]) {
158957b952d6SSatish Balay           mask[tmp] = 1;
159057b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
159157b952d6SSatish Balay           else masked1[dcount++] = tmp;
159257b952d6SSatish Balay         }
159357b952d6SSatish Balay       }
159457b952d6SSatish Balay       rowcount++;
159557b952d6SSatish Balay     }
1596cee3aa6bSSatish Balay 
159757b952d6SSatish Balay     dlens[i]  = dcount;
159857b952d6SSatish Balay     odlens[i] = odcount;
1599cee3aa6bSSatish Balay 
160057b952d6SSatish Balay     /* zero out the mask elements we set */
160157b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
160257b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
160357b952d6SSatish Balay   }
1604cee3aa6bSSatish Balay 
160557b952d6SSatish Balay   /* create our matrix */
1606537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1607537820f0SBarry Smith          CHKERRQ(ierr);
160857b952d6SSatish Balay   A = *newmat;
16096d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
161057b952d6SSatish Balay 
161157b952d6SSatish Balay   if (!rank) {
161257b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
161357b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
161457b952d6SSatish Balay     nz = procsnz[0];
161557b952d6SSatish Balay     vals = buf;
1616cee3aa6bSSatish Balay     mycols = ibuf;
1617cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
161857b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1619cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1620537820f0SBarry Smith 
162157b952d6SSatish Balay     /* insert into matrix */
162257b952d6SSatish Balay     jj      = rstart*bs;
162357b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
162457b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
162557b952d6SSatish Balay       mycols += locrowlens[i];
162657b952d6SSatish Balay       vals   += locrowlens[i];
162757b952d6SSatish Balay       jj++;
162857b952d6SSatish Balay     }
162957b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
163057b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
163157b952d6SSatish Balay       nz = procsnz[i];
163257b952d6SSatish Balay       vals = buf;
163357b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
163457b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
163557b952d6SSatish Balay     }
163657b952d6SSatish Balay     /* the last proc */
163757b952d6SSatish Balay     if ( size != 1 ){
163857b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1639cee3aa6bSSatish Balay       vals = buf;
164057b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
164157b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1642cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
164357b952d6SSatish Balay     }
164457b952d6SSatish Balay     PetscFree(procsnz);
164557b952d6SSatish Balay   }
164657b952d6SSatish Balay   else {
164757b952d6SSatish Balay     /* receive numeric values */
164857b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
164957b952d6SSatish Balay 
165057b952d6SSatish Balay     /* receive message of values*/
165157b952d6SSatish Balay     vals = buf;
1652cee3aa6bSSatish Balay     mycols = ibuf;
165357b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
165457b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1655e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
165657b952d6SSatish Balay 
165757b952d6SSatish Balay     /* insert into matrix */
165857b952d6SSatish Balay     jj      = rstart*bs;
1659cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
166057b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
166157b952d6SSatish Balay       mycols += locrowlens[i];
166257b952d6SSatish Balay       vals   += locrowlens[i];
166357b952d6SSatish Balay       jj++;
166457b952d6SSatish Balay     }
166557b952d6SSatish Balay   }
166657b952d6SSatish Balay   PetscFree(locrowlens);
166757b952d6SSatish Balay   PetscFree(buf);
166857b952d6SSatish Balay   PetscFree(ibuf);
166957b952d6SSatish Balay   PetscFree(rowners);
167057b952d6SSatish Balay   PetscFree(dlens);
1671cee3aa6bSSatish Balay   PetscFree(mask);
16726d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
16736d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
167457b952d6SSatish Balay   return 0;
167557b952d6SSatish Balay }
167657b952d6SSatish Balay 
167757b952d6SSatish Balay 
1678