xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision bcc3fcf6e73fafd102781dc6e108e4695d768df7)
179bdfe76SSatish Balay #ifndef lint
2*bcc3fcf6SBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.28 1996/11/07 15:09:44 bsmith Exp bsmith $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2357b952d6SSatish Balay 
243b2fbd54SBarry Smith 
25537820f0SBarry Smith /*
26537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2957b952d6SSatish Balay    length of colmap equals the global matrix length.
3057b952d6SSatish Balay */
3157b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3257b952d6SSatish Balay {
3357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3457b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
3557b952d6SSatish Balay   int         nbs = B->nbs,i;
3657b952d6SSatish Balay 
3757b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
3857b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3957b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
40905e6a2fSBarry Smith   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i + 1;
4157b952d6SSatish Balay   return 0;
4257b952d6SSatish Balay }
4357b952d6SSatish Balay 
443b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
453b2fbd54SBarry Smith                             PetscTruth *done)
4657b952d6SSatish Balay {
473b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
4857b952d6SSatish Balay   int         ierr;
493b2fbd54SBarry Smith   if (aij->size == 1) {
503b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
513b2fbd54SBarry Smith   } else SETERRQ(1,"MatGetRowIJ_MPIBAIJ:not supported in parallel");
523b2fbd54SBarry Smith   return 0;
533b2fbd54SBarry Smith }
543b2fbd54SBarry Smith 
553b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
563b2fbd54SBarry Smith                                 PetscTruth *done)
573b2fbd54SBarry Smith {
583b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
593b2fbd54SBarry Smith   int        ierr;
603b2fbd54SBarry Smith   if (aij->size == 1) {
613b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
623b2fbd54SBarry Smith   } else SETERRQ(1,"MatRestoreRowIJ_MPIBAIJ:not supported in parallel");
6357b952d6SSatish Balay   return 0;
6457b952d6SSatish Balay }
6557b952d6SSatish Balay 
66639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
6757b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
6857b952d6SSatish Balay {
6957b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
7057b952d6SSatish Balay   Scalar      value;
7157b952d6SSatish Balay   int         ierr,i,j, rstart = baij->rstart, rend = baij->rend;
7257b952d6SSatish Balay   int         cstart = baij->cstart, cend = baij->cend,row,col;
7357b952d6SSatish Balay   int         roworiented = baij->roworiented,rstart_orig,rend_orig;
7457b952d6SSatish Balay   int         cstart_orig,cend_orig,bs=baij->bs;
7557b952d6SSatish Balay 
7657b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
7757b952d6SSatish Balay     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
7857b952d6SSatish Balay   }
7957b952d6SSatish Balay   baij->insertmode = addv;
8057b952d6SSatish Balay   rstart_orig = rstart*bs;
8157b952d6SSatish Balay   rend_orig   = rend*bs;
8257b952d6SSatish Balay   cstart_orig = cstart*bs;
8357b952d6SSatish Balay   cend_orig   = cend*bs;
8457b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
85639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
8657b952d6SSatish Balay     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
8757b952d6SSatish Balay     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
88639f9d9dSBarry Smith #endif
8957b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
9057b952d6SSatish Balay       row = im[i] - rstart_orig;
9157b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
9257b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
9357b952d6SSatish Balay           col = in[j] - cstart_orig;
9457b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
95639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
9657b952d6SSatish Balay         }
97639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
98639f9d9dSBarry Smith         else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");}
99639f9d9dSBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");}
100639f9d9dSBarry Smith #endif
10157b952d6SSatish Balay         else {
10257b952d6SSatish Balay           if (mat->was_assembled) {
103905e6a2fSBarry Smith             if (!baij->colmap) {
104905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
105905e6a2fSBarry Smith             }
106905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
10757b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
10857b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
10957b952d6SSatish Balay               col =  in[j];
11057b952d6SSatish Balay             }
11157b952d6SSatish Balay           }
11257b952d6SSatish Balay           else col = in[j];
11357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
114639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
11557b952d6SSatish Balay         }
11657b952d6SSatish Balay       }
11757b952d6SSatish Balay     }
11857b952d6SSatish Balay     else {
11957b952d6SSatish Balay       if (roworiented) {
12057b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
12157b952d6SSatish Balay       }
12257b952d6SSatish Balay       else {
12357b952d6SSatish Balay         row = im[i];
12457b952d6SSatish Balay         for ( j=0; j<n; j++ ) {
12557b952d6SSatish Balay           ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
12657b952d6SSatish Balay         }
12757b952d6SSatish Balay       }
12857b952d6SSatish Balay     }
12957b952d6SSatish Balay   }
13057b952d6SSatish Balay   return 0;
13157b952d6SSatish Balay }
13257b952d6SSatish Balay 
133d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
134d6de1c52SSatish Balay {
135d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
136d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
137d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
138d6de1c52SSatish Balay 
139d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
140d6de1c52SSatish Balay     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row");
141d6de1c52SSatish Balay     if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large");
142d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
143d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
144d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
145d6de1c52SSatish Balay         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column");
146d6de1c52SSatish Balay         if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large");
147d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
148d6de1c52SSatish Balay           col = idxn[j] - bscstart;
149d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
150d6de1c52SSatish Balay         }
151d6de1c52SSatish Balay         else {
152905e6a2fSBarry Smith           if (!baij->colmap) {
153905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
154905e6a2fSBarry Smith           }
155e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
156e60e1c95SSatish Balay              (baij->garray[baij->colmap[idxn[j]/bs]-1] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
157d9d09a02SSatish Balay           else {
158905e6a2fSBarry Smith             col  = (baij->colmap[idxn[j]/bs]-1)*bs + idxn[j]%bs;
159d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
160d6de1c52SSatish Balay           }
161d6de1c52SSatish Balay         }
162d6de1c52SSatish Balay       }
163d9d09a02SSatish Balay     }
164d6de1c52SSatish Balay     else {
165d6de1c52SSatish Balay       SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported");
166d6de1c52SSatish Balay     }
167d6de1c52SSatish Balay   }
168d6de1c52SSatish Balay   return 0;
169d6de1c52SSatish Balay }
170d6de1c52SSatish Balay 
171d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
172d6de1c52SSatish Balay {
173d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
174d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
175acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
176d6de1c52SSatish Balay   double     sum = 0.0;
177d6de1c52SSatish Balay   Scalar     *v;
178d6de1c52SSatish Balay 
179d6de1c52SSatish Balay   if (baij->size == 1) {
180d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
181d6de1c52SSatish Balay   } else {
182d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
183d6de1c52SSatish Balay       v = amat->a;
184d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
185d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
186d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
187d6de1c52SSatish Balay #else
188d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
189d6de1c52SSatish Balay #endif
190d6de1c52SSatish Balay       }
191d6de1c52SSatish Balay       v = bmat->a;
192d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
193d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
194d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
195d6de1c52SSatish Balay #else
196d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
197d6de1c52SSatish Balay #endif
198d6de1c52SSatish Balay       }
199d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
200d6de1c52SSatish Balay       *norm = sqrt(*norm);
201d6de1c52SSatish Balay     }
202acdf5bf4SSatish Balay     else
203acdf5bf4SSatish Balay       SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
204d6de1c52SSatish Balay   }
205d6de1c52SSatish Balay   return 0;
206d6de1c52SSatish Balay }
20757b952d6SSatish Balay 
20857b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
20957b952d6SSatish Balay {
21057b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
21157b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
21257b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
21357b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
21457b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
21557b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
21657b952d6SSatish Balay   InsertMode  addv;
21757b952d6SSatish Balay   Scalar      *rvalues,*svalues;
21857b952d6SSatish Balay 
21957b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
22057b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
22157b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
22257b952d6SSatish Balay     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
22357b952d6SSatish Balay   }
22457b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
22557b952d6SSatish Balay 
22657b952d6SSatish Balay   /*  first count number of contributors to each processor */
22757b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
22857b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
22957b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
23057b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
23157b952d6SSatish Balay     idx = baij->stash.idx[i];
23257b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
23357b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
23457b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
23557b952d6SSatish Balay       }
23657b952d6SSatish Balay     }
23757b952d6SSatish Balay   }
23857b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
23957b952d6SSatish Balay 
24057b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
24157b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
24257b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
24357b952d6SSatish Balay   nreceives = work[rank];
24457b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
24557b952d6SSatish Balay   nmax = work[rank];
24657b952d6SSatish Balay   PetscFree(work);
24757b952d6SSatish Balay 
24857b952d6SSatish Balay   /* post receives:
24957b952d6SSatish Balay        1) each message will consist of ordered pairs
25057b952d6SSatish Balay      (global index,value) we store the global index as a double
25157b952d6SSatish Balay      to simplify the message passing.
25257b952d6SSatish Balay        2) since we don't know how long each individual message is we
25357b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
25457b952d6SSatish Balay      this is a lot of wasted space.
25557b952d6SSatish Balay 
25657b952d6SSatish Balay 
25757b952d6SSatish Balay        This could be done better.
25857b952d6SSatish Balay   */
25957b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
26057b952d6SSatish Balay   CHKPTRQ(rvalues);
26157b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
26257b952d6SSatish Balay   CHKPTRQ(recv_waits);
26357b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
26457b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
26557b952d6SSatish Balay               comm,recv_waits+i);
26657b952d6SSatish Balay   }
26757b952d6SSatish Balay 
26857b952d6SSatish Balay   /* do sends:
26957b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
27057b952d6SSatish Balay          the ith processor
27157b952d6SSatish Balay   */
27257b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
27357b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
27457b952d6SSatish Balay   CHKPTRQ(send_waits);
27557b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
27657b952d6SSatish Balay   starts[0] = 0;
27757b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
27857b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
27957b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
28057b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
28157b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
28257b952d6SSatish Balay   }
28357b952d6SSatish Balay   PetscFree(owner);
28457b952d6SSatish Balay   starts[0] = 0;
28557b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
28657b952d6SSatish Balay   count = 0;
28757b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
28857b952d6SSatish Balay     if (procs[i]) {
28957b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
29057b952d6SSatish Balay                 comm,send_waits+count++);
29157b952d6SSatish Balay     }
29257b952d6SSatish Balay   }
29357b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
29457b952d6SSatish Balay 
29557b952d6SSatish Balay   /* Free cache space */
29657b952d6SSatish Balay   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n);
29757b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
29857b952d6SSatish Balay 
29957b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
30057b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
30157b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
30257b952d6SSatish Balay   baij->rmax       = nmax;
30357b952d6SSatish Balay 
30457b952d6SSatish Balay   return 0;
30557b952d6SSatish Balay }
30657b952d6SSatish Balay 
30757b952d6SSatish Balay 
30857b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
30957b952d6SSatish Balay {
31057b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
31157b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
31257b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
31357b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
31457b952d6SSatish Balay   Scalar      *values,val;
31557b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
31657b952d6SSatish Balay 
31757b952d6SSatish Balay   /*  wait on receives */
31857b952d6SSatish Balay   while (count) {
31957b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
32057b952d6SSatish Balay     /* unpack receives into our local space */
32157b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
32257b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
32357b952d6SSatish Balay     n = n/3;
32457b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
32557b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
32657b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
32757b952d6SSatish Balay       val = values[3*i+2];
32857b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
32957b952d6SSatish Balay         col -= baij->cstart*bs;
33057b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
33157b952d6SSatish Balay       }
33257b952d6SSatish Balay       else {
33357b952d6SSatish Balay         if (mat->was_assembled) {
334905e6a2fSBarry Smith           if (!baij->colmap) {
335905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
336905e6a2fSBarry Smith           }
337905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
33857b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
33957b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
34057b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
34157b952d6SSatish Balay           }
34257b952d6SSatish Balay         }
34357b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
34457b952d6SSatish Balay       }
34557b952d6SSatish Balay     }
34657b952d6SSatish Balay     count--;
34757b952d6SSatish Balay   }
34857b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
34957b952d6SSatish Balay 
35057b952d6SSatish Balay   /* wait on sends */
35157b952d6SSatish Balay   if (baij->nsends) {
35257b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
35357b952d6SSatish Balay     CHKPTRQ(send_status);
35457b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
35557b952d6SSatish Balay     PetscFree(send_status);
35657b952d6SSatish Balay   }
35757b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
35857b952d6SSatish Balay 
35957b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
36057b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
36157b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
36257b952d6SSatish Balay 
36357b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
36457b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
36557b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
36657b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
36757b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
36857b952d6SSatish Balay   }
36957b952d6SSatish Balay 
3706d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
37157b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
37257b952d6SSatish Balay   }
37357b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
37457b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
37557b952d6SSatish Balay 
37657b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
37757b952d6SSatish Balay   return 0;
37857b952d6SSatish Balay }
37957b952d6SSatish Balay 
38057b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
38157b952d6SSatish Balay {
38257b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
38357b952d6SSatish Balay   int          ierr;
38457b952d6SSatish Balay 
38557b952d6SSatish Balay   if (baij->size == 1) {
38657b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
38757b952d6SSatish Balay   }
38857b952d6SSatish Balay   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
38957b952d6SSatish Balay   return 0;
39057b952d6SSatish Balay }
39157b952d6SSatish Balay 
39257b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
39357b952d6SSatish Balay {
39457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
395cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
39657b952d6SSatish Balay   FILE         *fd;
39757b952d6SSatish Balay   ViewerType   vtype;
39857b952d6SSatish Balay 
39957b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
40057b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
40157b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
402639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
4034e220ebcSLois Curfman McInnes       MatInfo info;
40457b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
40557b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
4064e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
40757b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
40857b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
4094e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
4104e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
4114e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
4124e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
4134e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
4144e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
41557b952d6SSatish Balay       fflush(fd);
41657b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
41757b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
41857b952d6SSatish Balay       return 0;
41957b952d6SSatish Balay     }
420639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
421*bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
42257b952d6SSatish Balay       return 0;
42357b952d6SSatish Balay     }
42457b952d6SSatish Balay   }
42557b952d6SSatish Balay 
42657b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
42757b952d6SSatish Balay     Draw       draw;
42857b952d6SSatish Balay     PetscTruth isnull;
42957b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
43057b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
43157b952d6SSatish Balay   }
43257b952d6SSatish Balay 
43357b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
43457b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
43557b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
43657b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
43757b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
43857b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
43957b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
44057b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
44157b952d6SSatish Balay     fflush(fd);
44257b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
44357b952d6SSatish Balay   }
44457b952d6SSatish Balay   else {
44557b952d6SSatish Balay     int size = baij->size;
44657b952d6SSatish Balay     rank = baij->rank;
44757b952d6SSatish Balay     if (size == 1) {
44857b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
44957b952d6SSatish Balay     }
45057b952d6SSatish Balay     else {
45157b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
45257b952d6SSatish Balay       Mat         A;
45357b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
45457b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
45557b952d6SSatish Balay       int         mbs=baij->mbs;
45657b952d6SSatish Balay       Scalar      *a;
45757b952d6SSatish Balay 
45857b952d6SSatish Balay       if (!rank) {
459cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
46057b952d6SSatish Balay         CHKERRQ(ierr);
46157b952d6SSatish Balay       }
46257b952d6SSatish Balay       else {
463cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
46457b952d6SSatish Balay         CHKERRQ(ierr);
46557b952d6SSatish Balay       }
46657b952d6SSatish Balay       PLogObjectParent(mat,A);
46757b952d6SSatish Balay 
46857b952d6SSatish Balay       /* copy over the A part */
46957b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
47057b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
47157b952d6SSatish Balay       row = baij->rstart;
47257b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
47357b952d6SSatish Balay 
47457b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
47557b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
47657b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
47757b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
47857b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
47957b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
480cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
481cee3aa6bSSatish Balay             col++; a += bs;
48257b952d6SSatish Balay           }
48357b952d6SSatish Balay         }
48457b952d6SSatish Balay       }
48557b952d6SSatish Balay       /* copy over the B part */
48657b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
48757b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
48857b952d6SSatish Balay       row = baij->rstart*bs;
48957b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
49057b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
49157b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
49257b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
49357b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
49457b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
495cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
496cee3aa6bSSatish Balay             col++; a += bs;
49757b952d6SSatish Balay           }
49857b952d6SSatish Balay         }
49957b952d6SSatish Balay       }
50057b952d6SSatish Balay       PetscFree(rvals);
5016d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5026d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
50357b952d6SSatish Balay       if (!rank) {
50457b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
50557b952d6SSatish Balay       }
50657b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
50757b952d6SSatish Balay     }
50857b952d6SSatish Balay   }
50957b952d6SSatish Balay   return 0;
51057b952d6SSatish Balay }
51157b952d6SSatish Balay 
51257b952d6SSatish Balay 
51357b952d6SSatish Balay 
51457b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
51557b952d6SSatish Balay {
51657b952d6SSatish Balay   Mat         mat = (Mat) obj;
51757b952d6SSatish Balay   int         ierr;
51857b952d6SSatish Balay   ViewerType  vtype;
51957b952d6SSatish Balay 
52057b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
52157b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
52257b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
52357b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
52457b952d6SSatish Balay   }
52557b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
52657b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
52757b952d6SSatish Balay   }
52857b952d6SSatish Balay   return 0;
52957b952d6SSatish Balay }
53057b952d6SSatish Balay 
53179bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
53279bdfe76SSatish Balay {
53379bdfe76SSatish Balay   Mat         mat = (Mat) obj;
53479bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
53579bdfe76SSatish Balay   int         ierr;
53679bdfe76SSatish Balay 
53779bdfe76SSatish Balay #if defined(PETSC_LOG)
53879bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
53979bdfe76SSatish Balay #endif
54079bdfe76SSatish Balay 
54179bdfe76SSatish Balay   PetscFree(baij->rowners);
54279bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
54379bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
54479bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
54579bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
54679bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
54779bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
54879bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
54979bdfe76SSatish Balay   PetscFree(baij);
55079bdfe76SSatish Balay   PLogObjectDestroy(mat);
55179bdfe76SSatish Balay   PetscHeaderDestroy(mat);
55279bdfe76SSatish Balay   return 0;
55379bdfe76SSatish Balay }
55479bdfe76SSatish Balay 
555cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
556cee3aa6bSSatish Balay {
557cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
55847b4a8eaSLois Curfman McInnes   int         ierr, nt;
559cee3aa6bSSatish Balay 
560c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
56147b4a8eaSLois Curfman McInnes   if (nt != a->n) {
5620ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and xx");
56347b4a8eaSLois Curfman McInnes   }
564c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
56547b4a8eaSLois Curfman McInnes   if (nt != a->m) {
5660ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and yy");
56747b4a8eaSLois Curfman McInnes   }
568cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
569cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
570cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
571cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
572*bcc3fcf6SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
573cee3aa6bSSatish Balay   return 0;
574cee3aa6bSSatish Balay }
575cee3aa6bSSatish Balay 
576cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
577cee3aa6bSSatish Balay {
578cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
579cee3aa6bSSatish Balay   int        ierr;
580cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
581cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
582cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
583cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
584cee3aa6bSSatish Balay   return 0;
585cee3aa6bSSatish Balay }
586cee3aa6bSSatish Balay 
587cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
588cee3aa6bSSatish Balay {
589cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
590cee3aa6bSSatish Balay   int        ierr;
591cee3aa6bSSatish Balay 
592cee3aa6bSSatish Balay   /* do nondiagonal part */
593cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
594cee3aa6bSSatish Balay   /* send it on its way */
595537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
596cee3aa6bSSatish Balay   /* do local part */
597cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
598cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
599cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
600cee3aa6bSSatish Balay   /* but is not perhaps always true. */
601639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
602cee3aa6bSSatish Balay   return 0;
603cee3aa6bSSatish Balay }
604cee3aa6bSSatish Balay 
605cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
606cee3aa6bSSatish Balay {
607cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
608cee3aa6bSSatish Balay   int        ierr;
609cee3aa6bSSatish Balay 
610cee3aa6bSSatish Balay   /* do nondiagonal part */
611cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
612cee3aa6bSSatish Balay   /* send it on its way */
613537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
614cee3aa6bSSatish Balay   /* do local part */
615cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
616cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
617cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
618cee3aa6bSSatish Balay   /* but is not perhaps always true. */
619537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
620cee3aa6bSSatish Balay   return 0;
621cee3aa6bSSatish Balay }
622cee3aa6bSSatish Balay 
623cee3aa6bSSatish Balay /*
624cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
625cee3aa6bSSatish Balay    diagonal block
626cee3aa6bSSatish Balay */
627cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
628cee3aa6bSSatish Balay {
629cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
630cee3aa6bSSatish Balay   if (a->M != a->N)
631cee3aa6bSSatish Balay     SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block");
632cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
633cee3aa6bSSatish Balay }
634cee3aa6bSSatish Balay 
635cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
636cee3aa6bSSatish Balay {
637cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
638cee3aa6bSSatish Balay   int        ierr;
639cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
640cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
641cee3aa6bSSatish Balay   return 0;
642cee3aa6bSSatish Balay }
64357b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
64457b952d6SSatish Balay {
64557b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
64657b952d6SSatish Balay   *m = mat->M; *n = mat->N;
64757b952d6SSatish Balay   return 0;
64857b952d6SSatish Balay }
64957b952d6SSatish Balay 
65057b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
65157b952d6SSatish Balay {
65257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
65357b952d6SSatish Balay   *m = mat->m; *n = mat->N;
65457b952d6SSatish Balay   return 0;
65557b952d6SSatish Balay }
65657b952d6SSatish Balay 
65757b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
65857b952d6SSatish Balay {
65957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
66057b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
66157b952d6SSatish Balay   return 0;
66257b952d6SSatish Balay }
66357b952d6SSatish Balay 
664acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
665acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
666acdf5bf4SSatish Balay 
667acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
668acdf5bf4SSatish Balay {
669acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
670acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
671acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
672d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
673d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
674acdf5bf4SSatish Balay 
675acdf5bf4SSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active");
676acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
677acdf5bf4SSatish Balay 
678acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
679acdf5bf4SSatish Balay     /*
680acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
681acdf5bf4SSatish Balay     */
682acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
683bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
684bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
685acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
686acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
687acdf5bf4SSatish Balay     }
688acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
689acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
690acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
691acdf5bf4SSatish Balay   }
692acdf5bf4SSatish Balay 
693acdf5bf4SSatish Balay 
694d9d09a02SSatish Balay   if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows")
695d9d09a02SSatish Balay   lrow = row - brstart;
696acdf5bf4SSatish Balay 
697acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
698acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
699acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
700acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
701acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
702acdf5bf4SSatish Balay   nztot = nzA + nzB;
703acdf5bf4SSatish Balay 
704acdf5bf4SSatish Balay   cmap  = mat->garray;
705acdf5bf4SSatish Balay   if (v  || idx) {
706acdf5bf4SSatish Balay     if (nztot) {
707acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
708acdf5bf4SSatish Balay       int imark = -1;
709acdf5bf4SSatish Balay       if (v) {
710acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
711acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
712d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
713acdf5bf4SSatish Balay           else break;
714acdf5bf4SSatish Balay         }
715acdf5bf4SSatish Balay         imark = i;
716acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
717acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
718acdf5bf4SSatish Balay       }
719acdf5bf4SSatish Balay       if (idx) {
720acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
721acdf5bf4SSatish Balay         if (imark > -1) {
722acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
723bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
724acdf5bf4SSatish Balay           }
725acdf5bf4SSatish Balay         } else {
726acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
727d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
728d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
729acdf5bf4SSatish Balay             else break;
730acdf5bf4SSatish Balay           }
731acdf5bf4SSatish Balay           imark = i;
732acdf5bf4SSatish Balay         }
733d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
734d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
735acdf5bf4SSatish Balay       }
736acdf5bf4SSatish Balay     }
737d212a18eSSatish Balay     else {
738d212a18eSSatish Balay       if (idx) *idx = 0;
739d212a18eSSatish Balay       if (v)   *v   = 0;
740d212a18eSSatish Balay     }
741acdf5bf4SSatish Balay   }
742acdf5bf4SSatish Balay   *nz = nztot;
743acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
744acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
745acdf5bf4SSatish Balay   return 0;
746acdf5bf4SSatish Balay }
747acdf5bf4SSatish Balay 
748acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
749acdf5bf4SSatish Balay {
750acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
751acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
752acdf5bf4SSatish Balay     SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called");
753acdf5bf4SSatish Balay   }
754acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
755acdf5bf4SSatish Balay   return 0;
756acdf5bf4SSatish Balay }
757acdf5bf4SSatish Balay 
7585a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
7595a838052SSatish Balay {
7605a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
7615a838052SSatish Balay   *bs = baij->bs;
7625a838052SSatish Balay   return 0;
7635a838052SSatish Balay }
7645a838052SSatish Balay 
76558667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A)
76658667388SSatish Balay {
76758667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
76858667388SSatish Balay   int         ierr;
76958667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
77058667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
77158667388SSatish Balay   return 0;
77258667388SSatish Balay }
7730ac07820SSatish Balay 
7744e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
7750ac07820SSatish Balay {
7764e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
7774e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
7787d57db60SLois Curfman McInnes   int         ierr;
7797d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
7800ac07820SSatish Balay 
7814e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
7824e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
7834e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
7844e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
7854e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
7864e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
7874e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
7884e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
7894e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
7900ac07820SSatish Balay   if (flag == MAT_LOCAL) {
7914e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
7924e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
7934e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
7944e220ebcSLois Curfman McInnes     info->memory       = isend[3];
7954e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
7960ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
7970ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm);
7984e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7994e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8004e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8014e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8024e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8030ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
8040ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm);
8054e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8064e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8074e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8084e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8094e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8100ac07820SSatish Balay   }
8114e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
8124e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
8134e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
8140ac07820SSatish Balay   return 0;
8150ac07820SSatish Balay }
8160ac07820SSatish Balay 
81758667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op)
81858667388SSatish Balay {
81958667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
82058667388SSatish Balay 
82158667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
82258667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
82358667388SSatish Balay       op == MAT_COLUMNS_SORTED ||
82458667388SSatish Balay       op == MAT_ROW_ORIENTED) {
82558667388SSatish Balay         MatSetOption(a->A,op);
82658667388SSatish Balay         MatSetOption(a->B,op);
82758667388SSatish Balay   }
82858667388SSatish Balay   else if (op == MAT_ROWS_SORTED ||
82958667388SSatish Balay            op == MAT_SYMMETRIC ||
83058667388SSatish Balay            op == MAT_STRUCTURALLY_SYMMETRIC ||
83158667388SSatish Balay            op == MAT_YES_NEW_DIAGONALS)
83258667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
83358667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
83458667388SSatish Balay     a->roworiented = 0;
83558667388SSatish Balay     MatSetOption(a->A,op);
83658667388SSatish Balay     MatSetOption(a->B,op);
83758667388SSatish Balay   }
83858667388SSatish Balay   else if (op == MAT_NO_NEW_DIAGONALS)
8390ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:MAT_NO_NEW_DIAGONALS");}
84058667388SSatish Balay   else
8410ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:unknown option");}
84258667388SSatish Balay   return 0;
84358667388SSatish Balay }
84458667388SSatish Balay 
8450ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
8460ac07820SSatish Balay {
8470ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
8480ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
8490ac07820SSatish Balay   Mat        B;
8500ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
8510ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
8520ac07820SSatish Balay   Scalar     *a;
8530ac07820SSatish Balay 
8540ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
8550ac07820SSatish Balay     SETERRQ(1,"MatTranspose_MPIBAIJ:Square matrix only for in-place");
8560ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
8570ac07820SSatish Balay   CHKERRQ(ierr);
8580ac07820SSatish Balay 
8590ac07820SSatish Balay   /* copy over the A part */
8600ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
8610ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
8620ac07820SSatish Balay   row = baij->rstart;
8630ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
8640ac07820SSatish Balay 
8650ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
8660ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
8670ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
8680ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
8690ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
8700ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
8710ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
8720ac07820SSatish Balay         col++; a += bs;
8730ac07820SSatish Balay       }
8740ac07820SSatish Balay     }
8750ac07820SSatish Balay   }
8760ac07820SSatish Balay   /* copy over the B part */
8770ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
8780ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
8790ac07820SSatish Balay   row = baij->rstart*bs;
8800ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
8810ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
8820ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
8830ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
8840ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
8850ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
8860ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
8870ac07820SSatish Balay         col++; a += bs;
8880ac07820SSatish Balay       }
8890ac07820SSatish Balay     }
8900ac07820SSatish Balay   }
8910ac07820SSatish Balay   PetscFree(rvals);
8920ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8930ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8940ac07820SSatish Balay 
8950ac07820SSatish Balay   if (matout != PETSC_NULL) {
8960ac07820SSatish Balay     *matout = B;
8970ac07820SSatish Balay   } else {
8980ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
8990ac07820SSatish Balay     PetscFree(baij->rowners);
9000ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
9010ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
9020ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
9030ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
9040ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
9050ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
9060ac07820SSatish Balay     PetscFree(baij);
9070ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
9080ac07820SSatish Balay     PetscHeaderDestroy(B);
9090ac07820SSatish Balay   }
9100ac07820SSatish Balay   return 0;
9110ac07820SSatish Balay }
9120e95ebc0SSatish Balay 
9130e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
9140e95ebc0SSatish Balay {
9150e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
9160e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
9170e95ebc0SSatish Balay   int ierr,s1,s2,s3;
9180e95ebc0SSatish Balay 
9190e95ebc0SSatish Balay   if (ll)  {
9200e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
9210e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
9220e95ebc0SSatish Balay     if (s1!=s2) SETERRQ(1,"MatDiagonalScale_MPIBAIJ: non-conforming local sizes");
9230e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
9240e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
9250e95ebc0SSatish Balay   }
9260e95ebc0SSatish Balay   if (rr) SETERRQ(1,"MatDiagonalScale_MPIBAIJ:not supported for right vector");
9270e95ebc0SSatish Balay   return 0;
9280e95ebc0SSatish Balay }
9290e95ebc0SSatish Balay 
9300ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
9310ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
9320ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
9330ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
9340ac07820SSatish Balay    routine.
9350ac07820SSatish Balay */
9360ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
9370ac07820SSatish Balay {
9380ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
9390ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
9400ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
9410ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
9420ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
9430ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
9440ac07820SSatish Balay   MPI_Comm       comm = A->comm;
9450ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
9460ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
9470ac07820SSatish Balay   IS             istmp;
9480ac07820SSatish Balay 
9490ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
9500ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
9510ac07820SSatish Balay 
9520ac07820SSatish Balay   /*  first count number of contributors to each processor */
9530ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
9540ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
9550ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
9560ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
9570ac07820SSatish Balay     idx = rows[i];
9580ac07820SSatish Balay     found = 0;
9590ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
9600ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
9610ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
9620ac07820SSatish Balay       }
9630ac07820SSatish Balay     }
9640ac07820SSatish Balay     if (!found) SETERRQ(1,"MatZeroRows_MPIBAIJ:Index out of range");
9650ac07820SSatish Balay   }
9660ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
9670ac07820SSatish Balay 
9680ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
9690ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
9700ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
9710ac07820SSatish Balay   nrecvs = work[rank];
9720ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
9730ac07820SSatish Balay   nmax = work[rank];
9740ac07820SSatish Balay   PetscFree(work);
9750ac07820SSatish Balay 
9760ac07820SSatish Balay   /* post receives:   */
9770ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
9780ac07820SSatish Balay   CHKPTRQ(rvalues);
9790ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
9800ac07820SSatish Balay   CHKPTRQ(recv_waits);
9810ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
9820ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
9830ac07820SSatish Balay   }
9840ac07820SSatish Balay 
9850ac07820SSatish Balay   /* do sends:
9860ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
9870ac07820SSatish Balay          the ith processor
9880ac07820SSatish Balay   */
9890ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
9900ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
9910ac07820SSatish Balay   CHKPTRQ(send_waits);
9920ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
9930ac07820SSatish Balay   starts[0] = 0;
9940ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
9950ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
9960ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
9970ac07820SSatish Balay   }
9980ac07820SSatish Balay   ISRestoreIndices(is,&rows);
9990ac07820SSatish Balay 
10000ac07820SSatish Balay   starts[0] = 0;
10010ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
10020ac07820SSatish Balay   count = 0;
10030ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
10040ac07820SSatish Balay     if (procs[i]) {
10050ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
10060ac07820SSatish Balay     }
10070ac07820SSatish Balay   }
10080ac07820SSatish Balay   PetscFree(starts);
10090ac07820SSatish Balay 
10100ac07820SSatish Balay   base = owners[rank]*bs;
10110ac07820SSatish Balay 
10120ac07820SSatish Balay   /*  wait on receives */
10130ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
10140ac07820SSatish Balay   source = lens + nrecvs;
10150ac07820SSatish Balay   count  = nrecvs; slen = 0;
10160ac07820SSatish Balay   while (count) {
10170ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
10180ac07820SSatish Balay     /* unpack receives into our local space */
10190ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
10200ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
10210ac07820SSatish Balay     lens[imdex]  = n;
10220ac07820SSatish Balay     slen += n;
10230ac07820SSatish Balay     count--;
10240ac07820SSatish Balay   }
10250ac07820SSatish Balay   PetscFree(recv_waits);
10260ac07820SSatish Balay 
10270ac07820SSatish Balay   /* move the data into the send scatter */
10280ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
10290ac07820SSatish Balay   count = 0;
10300ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
10310ac07820SSatish Balay     values = rvalues + i*nmax;
10320ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
10330ac07820SSatish Balay       lrows[count++] = values[j] - base;
10340ac07820SSatish Balay     }
10350ac07820SSatish Balay   }
10360ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
10370ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
10380ac07820SSatish Balay 
10390ac07820SSatish Balay   /* actually zap the local rows */
1040537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
10410ac07820SSatish Balay   PLogObjectParent(A,istmp);
10420ac07820SSatish Balay   PetscFree(lrows);
10430ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
10440ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
10450ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
10460ac07820SSatish Balay 
10470ac07820SSatish Balay   /* wait on sends */
10480ac07820SSatish Balay   if (nsends) {
10490ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
10500ac07820SSatish Balay     CHKPTRQ(send_status);
10510ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
10520ac07820SSatish Balay     PetscFree(send_status);
10530ac07820SSatish Balay   }
10540ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
10550ac07820SSatish Balay 
10560ac07820SSatish Balay   return 0;
10570ac07820SSatish Balay }
1058ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
1059ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A)
1060ba4ca20aSSatish Balay {
1061ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1062ba4ca20aSSatish Balay 
1063ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1064ba4ca20aSSatish Balay   else return 0;
1065ba4ca20aSSatish Balay }
10660ac07820SSatish Balay 
10670ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
10680ac07820SSatish Balay 
106979bdfe76SSatish Balay /* -------------------------------------------------------------------*/
107079bdfe76SSatish Balay static struct _MatOps MatOps = {
1071bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
10724c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
10734c50302cSBarry Smith   0,0,0,0,
10740ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
10750e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
107658667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
10774c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
10784c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
10794c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1080905e6a2fSBarry Smith   0,0,0,MatConvertSameType_MPIBAIJ,0,0,
1081d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1082ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
10834c50302cSBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ};
108479bdfe76SSatish Balay 
108579bdfe76SSatish Balay 
108679bdfe76SSatish Balay /*@C
108779bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
108879bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
108979bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
109079bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
109179bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
109279bdfe76SSatish Balay 
109379bdfe76SSatish Balay    Input Parameters:
109479bdfe76SSatish Balay .  comm - MPI communicator
109579bdfe76SSatish Balay .  bs   - size of blockk
109679bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
109792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
109892e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
109992e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
110092e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
110192e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
110279bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
110392e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
110479bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
110579bdfe76SSatish Balay            submatrix  (same for all local rows)
110692e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
110792e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
110892e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
110992e8d321SLois Curfman McInnes            it is zero.
111092e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
111179bdfe76SSatish Balay            submatrix (same for all local rows).
111292e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
111392e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
111492e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
111579bdfe76SSatish Balay 
111679bdfe76SSatish Balay    Output Parameter:
111779bdfe76SSatish Balay .  A - the matrix
111879bdfe76SSatish Balay 
111979bdfe76SSatish Balay    Notes:
112079bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
112179bdfe76SSatish Balay    (possibly both).
112279bdfe76SSatish Balay 
112379bdfe76SSatish Balay    Storage Information:
112479bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
112579bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
112679bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
112779bdfe76SSatish Balay    local matrix (a rectangular submatrix).
112879bdfe76SSatish Balay 
112979bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
113079bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
113179bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
113279bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
113379bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
113479bdfe76SSatish Balay 
113579bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
113679bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
113779bdfe76SSatish Balay 
113879bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
113979bdfe76SSatish Balay $         -------------------
114079bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
114179bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
114279bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
114379bdfe76SSatish Balay $         -------------------
114479bdfe76SSatish Balay $
114579bdfe76SSatish Balay 
114679bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
114779bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
114879bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
114957b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
115079bdfe76SSatish Balay 
115179bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
115279bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
115379bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
115492e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
115592e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
115692e8d321SLois Curfman McInnes    matrices and the file $(PETSC_DIR)/Performance.
115779bdfe76SSatish Balay 
115892e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
115979bdfe76SSatish Balay 
116079bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
116179bdfe76SSatish Balay @*/
116279bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
116379bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
116479bdfe76SSatish Balay {
116579bdfe76SSatish Balay   Mat          B;
116679bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
11674c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
116879bdfe76SSatish Balay 
116979bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
117079bdfe76SSatish Balay   *A = 0;
117179bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
117279bdfe76SSatish Balay   PLogObjectCreate(B);
117379bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
117479bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
117579bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
11764c50302cSBarry Smith   MPI_Comm_size(comm,&size);
11774c50302cSBarry Smith   if (size == 1) {
11784c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
11794c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
11804c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
11814c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
11824c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
11834c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
11844c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
11854c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
11864c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
11874c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
11884c50302cSBarry Smith   }
11894c50302cSBarry Smith 
119079bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
119179bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
119257b952d6SSatish Balay 
119379bdfe76SSatish Balay   B->factor     = 0;
119479bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
119579bdfe76SSatish Balay 
119679bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
119779bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
119879bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
119979bdfe76SSatish Balay 
120079bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
120157b952d6SSatish Balay     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1202cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified");
1203cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified");
1204cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1205cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
120679bdfe76SSatish Balay 
120779bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
120879bdfe76SSatish Balay     work[0] = m; work[1] = n;
120979bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
121079bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
121179bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
121279bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
121379bdfe76SSatish Balay   }
121479bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
121579bdfe76SSatish Balay     Mbs = M/bs;
121679bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
121779bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
121879bdfe76SSatish Balay     m   = mbs*bs;
121979bdfe76SSatish Balay   }
122079bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
122179bdfe76SSatish Balay     Nbs = N/bs;
122279bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
122379bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
122479bdfe76SSatish Balay     n   = nbs*bs;
122579bdfe76SSatish Balay   }
1226cee3aa6bSSatish Balay   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
122779bdfe76SSatish Balay 
122879bdfe76SSatish Balay   b->m = m; B->m = m;
122979bdfe76SSatish Balay   b->n = n; B->n = n;
123079bdfe76SSatish Balay   b->N = N; B->N = N;
123179bdfe76SSatish Balay   b->M = M; B->M = M;
123279bdfe76SSatish Balay   b->bs  = bs;
123379bdfe76SSatish Balay   b->bs2 = bs*bs;
123479bdfe76SSatish Balay   b->mbs = mbs;
123579bdfe76SSatish Balay   b->nbs = nbs;
123679bdfe76SSatish Balay   b->Mbs = Mbs;
123779bdfe76SSatish Balay   b->Nbs = Nbs;
123879bdfe76SSatish Balay 
123979bdfe76SSatish Balay   /* build local table of row and column ownerships */
124079bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
124179bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
12420ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
124379bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
124479bdfe76SSatish Balay   b->rowners[0] = 0;
124579bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
124679bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
124779bdfe76SSatish Balay   }
124879bdfe76SSatish Balay   b->rstart = b->rowners[b->rank];
124979bdfe76SSatish Balay   b->rend   = b->rowners[b->rank+1];
125079bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
125179bdfe76SSatish Balay   b->cowners[0] = 0;
125279bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
125379bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
125479bdfe76SSatish Balay   }
125579bdfe76SSatish Balay   b->cstart = b->cowners[b->rank];
125679bdfe76SSatish Balay   b->cend   = b->cowners[b->rank+1];
125779bdfe76SSatish Balay 
125879bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
125979bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
126079bdfe76SSatish Balay   PLogObjectParent(B,b->A);
126179bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
126279bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
126379bdfe76SSatish Balay   PLogObjectParent(B,b->B);
126479bdfe76SSatish Balay 
126579bdfe76SSatish Balay   /* build cache for off array entries formed */
126679bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
126779bdfe76SSatish Balay   b->colmap      = 0;
126879bdfe76SSatish Balay   b->garray      = 0;
126979bdfe76SSatish Balay   b->roworiented = 1;
127079bdfe76SSatish Balay 
127179bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
127279bdfe76SSatish Balay   b->lvec      = 0;
127379bdfe76SSatish Balay   b->Mvctx     = 0;
127479bdfe76SSatish Balay 
127579bdfe76SSatish Balay   /* stuff for MatGetRow() */
127679bdfe76SSatish Balay   b->rowindices   = 0;
127779bdfe76SSatish Balay   b->rowvalues    = 0;
127879bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
127979bdfe76SSatish Balay 
128079bdfe76SSatish Balay   *A = B;
128179bdfe76SSatish Balay   return 0;
128279bdfe76SSatish Balay }
12830ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
12840ac07820SSatish Balay {
12850ac07820SSatish Balay   Mat        mat;
12860ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
12870ac07820SSatish Balay   int        ierr, len=0, flg;
12880ac07820SSatish Balay 
12890ac07820SSatish Balay   *newmat       = 0;
12900ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
12910ac07820SSatish Balay   PLogObjectCreate(mat);
12920ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
12930ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
12940ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
12950ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
12960ac07820SSatish Balay   mat->factor     = matin->factor;
12970ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
12980ac07820SSatish Balay 
12990ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
13000ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
13010ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
13020ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
13030ac07820SSatish Balay 
13040ac07820SSatish Balay   a->bs  = oldmat->bs;
13050ac07820SSatish Balay   a->bs2 = oldmat->bs2;
13060ac07820SSatish Balay   a->mbs = oldmat->mbs;
13070ac07820SSatish Balay   a->nbs = oldmat->nbs;
13080ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
13090ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
13100ac07820SSatish Balay 
13110ac07820SSatish Balay   a->rstart       = oldmat->rstart;
13120ac07820SSatish Balay   a->rend         = oldmat->rend;
13130ac07820SSatish Balay   a->cstart       = oldmat->cstart;
13140ac07820SSatish Balay   a->cend         = oldmat->cend;
13150ac07820SSatish Balay   a->size         = oldmat->size;
13160ac07820SSatish Balay   a->rank         = oldmat->rank;
13170ac07820SSatish Balay   a->insertmode   = NOT_SET_VALUES;
13180ac07820SSatish Balay   a->rowvalues    = 0;
13190ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
13200ac07820SSatish Balay 
13210ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
13220ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
13230ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
13240ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
13250ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
13260ac07820SSatish Balay   if (oldmat->colmap) {
13270ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
13280ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
13290ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
13300ac07820SSatish Balay   } else a->colmap = 0;
13310ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
13320ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
13330ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
13340ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
13350ac07820SSatish Balay   } else a->garray = 0;
13360ac07820SSatish Balay 
13370ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
13380ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
13390ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
13400ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
13410ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
13420ac07820SSatish Balay   PLogObjectParent(mat,a->A);
13430ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
13440ac07820SSatish Balay   PLogObjectParent(mat,a->B);
13450ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
13460ac07820SSatish Balay   if (flg) {
13470ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
13480ac07820SSatish Balay   }
13490ac07820SSatish Balay   *newmat = mat;
13500ac07820SSatish Balay   return 0;
13510ac07820SSatish Balay }
135257b952d6SSatish Balay 
135357b952d6SSatish Balay #include "sys.h"
135457b952d6SSatish Balay 
135557b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
135657b952d6SSatish Balay {
135757b952d6SSatish Balay   Mat          A;
135857b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
135957b952d6SSatish Balay   Scalar       *vals,*buf;
136057b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
136157b952d6SSatish Balay   MPI_Status   status;
1362cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
136357b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
136457b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
136557b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
136657b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
136757b952d6SSatish Balay 
136857b952d6SSatish Balay 
136957b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
137057b952d6SSatish Balay   bs2  = bs*bs;
137157b952d6SSatish Balay 
137257b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
137357b952d6SSatish Balay   if (!rank) {
137457b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
137557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1376cee3aa6bSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object");
137757b952d6SSatish Balay   }
137857b952d6SSatish Balay 
137957b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
138057b952d6SSatish Balay   M = header[1]; N = header[2];
138157b952d6SSatish Balay 
138257b952d6SSatish Balay   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
138357b952d6SSatish Balay 
138457b952d6SSatish Balay   /*
138557b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
138657b952d6SSatish Balay      divisible by the blocksize
138757b952d6SSatish Balay   */
138857b952d6SSatish Balay   Mbs        = M/bs;
138957b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
139057b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
139157b952d6SSatish Balay   else                  Mbs++;
139257b952d6SSatish Balay   if (extra_rows &&!rank) {
1393537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
139457b952d6SSatish Balay   }
1395537820f0SBarry Smith 
139657b952d6SSatish Balay   /* determine ownership of all rows */
139757b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
139857b952d6SSatish Balay   m   = mbs * bs;
1399cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1400cee3aa6bSSatish Balay   browners = rowners + size + 1;
140157b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
140257b952d6SSatish Balay   rowners[0] = 0;
1403cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1404cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
140557b952d6SSatish Balay   rstart = rowners[rank];
140657b952d6SSatish Balay   rend   = rowners[rank+1];
140757b952d6SSatish Balay 
140857b952d6SSatish Balay   /* distribute row lengths to all processors */
140957b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
141057b952d6SSatish Balay   if (!rank) {
141157b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
141257b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
141357b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
141457b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1415cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1416cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
141757b952d6SSatish Balay     PetscFree(sndcounts);
141857b952d6SSatish Balay   }
141957b952d6SSatish Balay   else {
142057b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
142157b952d6SSatish Balay   }
142257b952d6SSatish Balay 
142357b952d6SSatish Balay   if (!rank) {
142457b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
142557b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
142657b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
142757b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
142857b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
142957b952d6SSatish Balay         procsnz[i] += rowlengths[j];
143057b952d6SSatish Balay       }
143157b952d6SSatish Balay     }
143257b952d6SSatish Balay     PetscFree(rowlengths);
143357b952d6SSatish Balay 
143457b952d6SSatish Balay     /* determine max buffer needed and allocate it */
143557b952d6SSatish Balay     maxnz = 0;
143657b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
143757b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
143857b952d6SSatish Balay     }
143957b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
144057b952d6SSatish Balay 
144157b952d6SSatish Balay     /* read in my part of the matrix column indices  */
144257b952d6SSatish Balay     nz = procsnz[0];
144357b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
144457b952d6SSatish Balay     mycols = ibuf;
1445cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
144657b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1447cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1448cee3aa6bSSatish Balay 
144957b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
145057b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
145157b952d6SSatish Balay       nz = procsnz[i];
145257b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
145357b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
145457b952d6SSatish Balay     }
145557b952d6SSatish Balay     /* read in the stuff for the last proc */
145657b952d6SSatish Balay     if ( size != 1 ) {
145757b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
145857b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
145957b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1460cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
146157b952d6SSatish Balay     }
146257b952d6SSatish Balay     PetscFree(cols);
146357b952d6SSatish Balay   }
146457b952d6SSatish Balay   else {
146557b952d6SSatish Balay     /* determine buffer space needed for message */
146657b952d6SSatish Balay     nz = 0;
146757b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
146857b952d6SSatish Balay       nz += locrowlens[i];
146957b952d6SSatish Balay     }
147057b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
147157b952d6SSatish Balay     mycols = ibuf;
147257b952d6SSatish Balay     /* receive message of column indices*/
147357b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
147457b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1475cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
147657b952d6SSatish Balay   }
147757b952d6SSatish Balay 
147857b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1479cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1480cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
148157b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1482cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
148357b952d6SSatish Balay   masked1 = mask    + Mbs;
148457b952d6SSatish Balay   masked2 = masked1 + Mbs;
148557b952d6SSatish Balay   rowcount = 0; nzcount = 0;
148657b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
148757b952d6SSatish Balay     dcount  = 0;
148857b952d6SSatish Balay     odcount = 0;
148957b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
149057b952d6SSatish Balay       kmax = locrowlens[rowcount];
149157b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
149257b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
149357b952d6SSatish Balay         if (!mask[tmp]) {
149457b952d6SSatish Balay           mask[tmp] = 1;
149557b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
149657b952d6SSatish Balay           else masked1[dcount++] = tmp;
149757b952d6SSatish Balay         }
149857b952d6SSatish Balay       }
149957b952d6SSatish Balay       rowcount++;
150057b952d6SSatish Balay     }
1501cee3aa6bSSatish Balay 
150257b952d6SSatish Balay     dlens[i]  = dcount;
150357b952d6SSatish Balay     odlens[i] = odcount;
1504cee3aa6bSSatish Balay 
150557b952d6SSatish Balay     /* zero out the mask elements we set */
150657b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
150757b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
150857b952d6SSatish Balay   }
1509cee3aa6bSSatish Balay 
151057b952d6SSatish Balay   /* create our matrix */
1511537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1512537820f0SBarry Smith          CHKERRQ(ierr);
151357b952d6SSatish Balay   A = *newmat;
15146d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
151557b952d6SSatish Balay 
151657b952d6SSatish Balay   if (!rank) {
151757b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
151857b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
151957b952d6SSatish Balay     nz = procsnz[0];
152057b952d6SSatish Balay     vals = buf;
1521cee3aa6bSSatish Balay     mycols = ibuf;
1522cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
152357b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1524cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1525537820f0SBarry Smith 
152657b952d6SSatish Balay     /* insert into matrix */
152757b952d6SSatish Balay     jj      = rstart*bs;
152857b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
152957b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
153057b952d6SSatish Balay       mycols += locrowlens[i];
153157b952d6SSatish Balay       vals   += locrowlens[i];
153257b952d6SSatish Balay       jj++;
153357b952d6SSatish Balay     }
153457b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
153557b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
153657b952d6SSatish Balay       nz = procsnz[i];
153757b952d6SSatish Balay       vals = buf;
153857b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
153957b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
154057b952d6SSatish Balay     }
154157b952d6SSatish Balay     /* the last proc */
154257b952d6SSatish Balay     if ( size != 1 ){
154357b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1544cee3aa6bSSatish Balay       vals = buf;
154557b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
154657b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1547cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
154857b952d6SSatish Balay     }
154957b952d6SSatish Balay     PetscFree(procsnz);
155057b952d6SSatish Balay   }
155157b952d6SSatish Balay   else {
155257b952d6SSatish Balay     /* receive numeric values */
155357b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
155457b952d6SSatish Balay 
155557b952d6SSatish Balay     /* receive message of values*/
155657b952d6SSatish Balay     vals = buf;
1557cee3aa6bSSatish Balay     mycols = ibuf;
155857b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
155957b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1560cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
156157b952d6SSatish Balay 
156257b952d6SSatish Balay     /* insert into matrix */
156357b952d6SSatish Balay     jj      = rstart*bs;
1564cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
156557b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
156657b952d6SSatish Balay       mycols += locrowlens[i];
156757b952d6SSatish Balay       vals   += locrowlens[i];
156857b952d6SSatish Balay       jj++;
156957b952d6SSatish Balay     }
157057b952d6SSatish Balay   }
157157b952d6SSatish Balay   PetscFree(locrowlens);
157257b952d6SSatish Balay   PetscFree(buf);
157357b952d6SSatish Balay   PetscFree(ibuf);
157457b952d6SSatish Balay   PetscFree(rowners);
157557b952d6SSatish Balay   PetscFree(dlens);
1576cee3aa6bSSatish Balay   PetscFree(mask);
15776d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15786d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
157957b952d6SSatish Balay   return 0;
158057b952d6SSatish Balay }
158157b952d6SSatish Balay 
158257b952d6SSatish Balay 
1583