xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 0e95ebc02a59a3a6fb004fab8a77214641f0608d)
179bdfe76SSatish Balay #ifndef lint
2*0e95ebc0SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.25 1996/09/14 03:08:37 bsmith Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2357b952d6SSatish Balay 
243b2fbd54SBarry Smith 
25537820f0SBarry Smith /*
26537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2957b952d6SSatish Balay    length of colmap equals the global matrix length.
3057b952d6SSatish Balay */
3157b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3257b952d6SSatish Balay {
3357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3457b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
3557b952d6SSatish Balay   int         nbs = B->nbs,i;
3657b952d6SSatish Balay 
3757b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
3857b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3957b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
40905e6a2fSBarry Smith   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i + 1;
4157b952d6SSatish Balay   return 0;
4257b952d6SSatish Balay }
4357b952d6SSatish Balay 
443b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
453b2fbd54SBarry Smith                             PetscTruth *done)
4657b952d6SSatish Balay {
473b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
4857b952d6SSatish Balay   int         ierr;
493b2fbd54SBarry Smith   if (aij->size == 1) {
503b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
513b2fbd54SBarry Smith   } else SETERRQ(1,"MatGetRowIJ_MPIBAIJ:not supported in parallel");
523b2fbd54SBarry Smith   return 0;
533b2fbd54SBarry Smith }
543b2fbd54SBarry Smith 
553b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
563b2fbd54SBarry Smith                                 PetscTruth *done)
573b2fbd54SBarry Smith {
583b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
593b2fbd54SBarry Smith   int        ierr;
603b2fbd54SBarry Smith   if (aij->size == 1) {
613b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
623b2fbd54SBarry Smith   } else SETERRQ(1,"MatRestoreRowIJ_MPIBAIJ:not supported in parallel");
6357b952d6SSatish Balay   return 0;
6457b952d6SSatish Balay }
6557b952d6SSatish Balay 
6657b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
6757b952d6SSatish Balay {
6857b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
6957b952d6SSatish Balay   Scalar      value;
7057b952d6SSatish Balay   int         ierr,i,j, rstart = baij->rstart, rend = baij->rend;
7157b952d6SSatish Balay   int         cstart = baij->cstart, cend = baij->cend,row,col;
7257b952d6SSatish Balay   int         roworiented = baij->roworiented,rstart_orig,rend_orig;
7357b952d6SSatish Balay   int         cstart_orig,cend_orig,bs=baij->bs;
7457b952d6SSatish Balay 
7557b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
7657b952d6SSatish Balay     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
7757b952d6SSatish Balay   }
7857b952d6SSatish Balay   baij->insertmode = addv;
7957b952d6SSatish Balay   rstart_orig = rstart*bs;
8057b952d6SSatish Balay   rend_orig   = rend*bs;
8157b952d6SSatish Balay   cstart_orig = cstart*bs;
8257b952d6SSatish Balay   cend_orig   = cend*bs;
8357b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
8457b952d6SSatish Balay     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
8557b952d6SSatish Balay     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
8657b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
8757b952d6SSatish Balay       row = im[i] - rstart_orig;
8857b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
8957b952d6SSatish Balay         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");
9057b952d6SSatish Balay         if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");
9157b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
9257b952d6SSatish Balay           col = in[j] - cstart_orig;
9357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
9457b952d6SSatish Balay           ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
9557b952d6SSatish Balay         }
9657b952d6SSatish Balay         else {
9757b952d6SSatish Balay           if (mat->was_assembled) {
98905e6a2fSBarry Smith             if (!baij->colmap) {
99905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
100905e6a2fSBarry Smith             }
101905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
10257b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
10357b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
10457b952d6SSatish Balay               col =  in[j];
10557b952d6SSatish Balay             }
10657b952d6SSatish Balay           }
10757b952d6SSatish Balay           else col = in[j];
10857b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
10957b952d6SSatish Balay           ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
11057b952d6SSatish Balay         }
11157b952d6SSatish Balay       }
11257b952d6SSatish Balay     }
11357b952d6SSatish Balay     else {
11457b952d6SSatish Balay       if (roworiented) {
11557b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
11657b952d6SSatish Balay       }
11757b952d6SSatish Balay       else {
11857b952d6SSatish Balay         row = im[i];
11957b952d6SSatish Balay         for ( j=0; j<n; j++ ) {
12057b952d6SSatish Balay           ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
12157b952d6SSatish Balay         }
12257b952d6SSatish Balay       }
12357b952d6SSatish Balay     }
12457b952d6SSatish Balay   }
12557b952d6SSatish Balay   return 0;
12657b952d6SSatish Balay }
12757b952d6SSatish Balay 
128d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
129d6de1c52SSatish Balay {
130d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
131d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
132d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
133d6de1c52SSatish Balay 
134d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
135d6de1c52SSatish Balay     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row");
136d6de1c52SSatish Balay     if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large");
137d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
138d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
139d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
140d6de1c52SSatish Balay         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column");
141d6de1c52SSatish Balay         if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large");
142d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
143d6de1c52SSatish Balay           col = idxn[j] - bscstart;
144d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
145d6de1c52SSatish Balay         }
146d6de1c52SSatish Balay         else {
147905e6a2fSBarry Smith           if (!baij->colmap) {
148905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
149905e6a2fSBarry Smith           }
150e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
151e60e1c95SSatish Balay              (baij->garray[baij->colmap[idxn[j]/bs]-1] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
152d9d09a02SSatish Balay           else {
153905e6a2fSBarry Smith             col  = (baij->colmap[idxn[j]/bs]-1)*bs + idxn[j]%bs;
154d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
155d6de1c52SSatish Balay           }
156d6de1c52SSatish Balay         }
157d6de1c52SSatish Balay       }
158d9d09a02SSatish Balay     }
159d6de1c52SSatish Balay     else {
160d6de1c52SSatish Balay       SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported");
161d6de1c52SSatish Balay     }
162d6de1c52SSatish Balay   }
163d6de1c52SSatish Balay   return 0;
164d6de1c52SSatish Balay }
165d6de1c52SSatish Balay 
166d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
167d6de1c52SSatish Balay {
168d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
169d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
170acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
171d6de1c52SSatish Balay   double     sum = 0.0;
172d6de1c52SSatish Balay   Scalar     *v;
173d6de1c52SSatish Balay 
174d6de1c52SSatish Balay   if (baij->size == 1) {
175d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
176d6de1c52SSatish Balay   } else {
177d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
178d6de1c52SSatish Balay       v = amat->a;
179d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
180d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
181d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
182d6de1c52SSatish Balay #else
183d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
184d6de1c52SSatish Balay #endif
185d6de1c52SSatish Balay       }
186d6de1c52SSatish Balay       v = bmat->a;
187d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
188d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
189d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
190d6de1c52SSatish Balay #else
191d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
192d6de1c52SSatish Balay #endif
193d6de1c52SSatish Balay       }
194d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
195d6de1c52SSatish Balay       *norm = sqrt(*norm);
196d6de1c52SSatish Balay     }
197acdf5bf4SSatish Balay     else
198acdf5bf4SSatish Balay       SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
199d6de1c52SSatish Balay   }
200d6de1c52SSatish Balay   return 0;
201d6de1c52SSatish Balay }
20257b952d6SSatish Balay 
20357b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
20457b952d6SSatish Balay {
20557b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
20657b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
20757b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
20857b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
20957b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
21057b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
21157b952d6SSatish Balay   InsertMode  addv;
21257b952d6SSatish Balay   Scalar      *rvalues,*svalues;
21357b952d6SSatish Balay 
21457b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
21557b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
21657b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
21757b952d6SSatish Balay     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
21857b952d6SSatish Balay   }
21957b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
22057b952d6SSatish Balay 
22157b952d6SSatish Balay   /*  first count number of contributors to each processor */
22257b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
22357b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
22457b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
22557b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
22657b952d6SSatish Balay     idx = baij->stash.idx[i];
22757b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
22857b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
22957b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
23057b952d6SSatish Balay       }
23157b952d6SSatish Balay     }
23257b952d6SSatish Balay   }
23357b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
23457b952d6SSatish Balay 
23557b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
23657b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
23757b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
23857b952d6SSatish Balay   nreceives = work[rank];
23957b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
24057b952d6SSatish Balay   nmax = work[rank];
24157b952d6SSatish Balay   PetscFree(work);
24257b952d6SSatish Balay 
24357b952d6SSatish Balay   /* post receives:
24457b952d6SSatish Balay        1) each message will consist of ordered pairs
24557b952d6SSatish Balay      (global index,value) we store the global index as a double
24657b952d6SSatish Balay      to simplify the message passing.
24757b952d6SSatish Balay        2) since we don't know how long each individual message is we
24857b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
24957b952d6SSatish Balay      this is a lot of wasted space.
25057b952d6SSatish Balay 
25157b952d6SSatish Balay 
25257b952d6SSatish Balay        This could be done better.
25357b952d6SSatish Balay   */
25457b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
25557b952d6SSatish Balay   CHKPTRQ(rvalues);
25657b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
25757b952d6SSatish Balay   CHKPTRQ(recv_waits);
25857b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
25957b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
26057b952d6SSatish Balay               comm,recv_waits+i);
26157b952d6SSatish Balay   }
26257b952d6SSatish Balay 
26357b952d6SSatish Balay   /* do sends:
26457b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
26557b952d6SSatish Balay          the ith processor
26657b952d6SSatish Balay   */
26757b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
26857b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
26957b952d6SSatish Balay   CHKPTRQ(send_waits);
27057b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
27157b952d6SSatish Balay   starts[0] = 0;
27257b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
27357b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
27457b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
27557b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
27657b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
27757b952d6SSatish Balay   }
27857b952d6SSatish Balay   PetscFree(owner);
27957b952d6SSatish Balay   starts[0] = 0;
28057b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
28157b952d6SSatish Balay   count = 0;
28257b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
28357b952d6SSatish Balay     if (procs[i]) {
28457b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
28557b952d6SSatish Balay                 comm,send_waits+count++);
28657b952d6SSatish Balay     }
28757b952d6SSatish Balay   }
28857b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
28957b952d6SSatish Balay 
29057b952d6SSatish Balay   /* Free cache space */
29157b952d6SSatish Balay   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n);
29257b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
29357b952d6SSatish Balay 
29457b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
29557b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
29657b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
29757b952d6SSatish Balay   baij->rmax       = nmax;
29857b952d6SSatish Balay 
29957b952d6SSatish Balay   return 0;
30057b952d6SSatish Balay }
30157b952d6SSatish Balay 
30257b952d6SSatish Balay 
30357b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
30457b952d6SSatish Balay {
30557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
30657b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
30757b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
30857b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
30957b952d6SSatish Balay   Scalar      *values,val;
31057b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
31157b952d6SSatish Balay 
31257b952d6SSatish Balay   /*  wait on receives */
31357b952d6SSatish Balay   while (count) {
31457b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
31557b952d6SSatish Balay     /* unpack receives into our local space */
31657b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
31757b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
31857b952d6SSatish Balay     n = n/3;
31957b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
32057b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
32157b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
32257b952d6SSatish Balay       val = values[3*i+2];
32357b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
32457b952d6SSatish Balay         col -= baij->cstart*bs;
32557b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
32657b952d6SSatish Balay       }
32757b952d6SSatish Balay       else {
32857b952d6SSatish Balay         if (mat->was_assembled) {
329905e6a2fSBarry Smith           if (!baij->colmap) {
330905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
331905e6a2fSBarry Smith           }
332905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
33357b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
33457b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
33557b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
33657b952d6SSatish Balay           }
33757b952d6SSatish Balay         }
33857b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
33957b952d6SSatish Balay       }
34057b952d6SSatish Balay     }
34157b952d6SSatish Balay     count--;
34257b952d6SSatish Balay   }
34357b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
34457b952d6SSatish Balay 
34557b952d6SSatish Balay   /* wait on sends */
34657b952d6SSatish Balay   if (baij->nsends) {
34757b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
34857b952d6SSatish Balay     CHKPTRQ(send_status);
34957b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
35057b952d6SSatish Balay     PetscFree(send_status);
35157b952d6SSatish Balay   }
35257b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
35357b952d6SSatish Balay 
35457b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
35557b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
35657b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
35757b952d6SSatish Balay 
35857b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
35957b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
36057b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
36157b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
36257b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
36357b952d6SSatish Balay   }
36457b952d6SSatish Balay 
3656d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
36657b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
36757b952d6SSatish Balay   }
36857b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
36957b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
37057b952d6SSatish Balay 
37157b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
37257b952d6SSatish Balay   return 0;
37357b952d6SSatish Balay }
37457b952d6SSatish Balay 
37557b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
37657b952d6SSatish Balay {
37757b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
37857b952d6SSatish Balay   int          ierr;
37957b952d6SSatish Balay 
38057b952d6SSatish Balay   if (baij->size == 1) {
38157b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
38257b952d6SSatish Balay   }
38357b952d6SSatish Balay   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
38457b952d6SSatish Balay   return 0;
38557b952d6SSatish Balay }
38657b952d6SSatish Balay 
38757b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
38857b952d6SSatish Balay {
38957b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
390cee3aa6bSSatish Balay   int          ierr, format,rank,bs=baij->bs;
39157b952d6SSatish Balay   FILE         *fd;
39257b952d6SSatish Balay   ViewerType   vtype;
39357b952d6SSatish Balay 
39457b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
39557b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
39657b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
39757b952d6SSatish Balay     if (format == ASCII_FORMAT_INFO_DETAILED) {
3984e220ebcSLois Curfman McInnes       MatInfo info;
39957b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
40057b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
4014e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
40257b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
40357b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
4044e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
4054e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
4064e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
4074e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
4084e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
4094e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
41057b952d6SSatish Balay       fflush(fd);
41157b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
41257b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
41357b952d6SSatish Balay       return 0;
41457b952d6SSatish Balay     }
41557b952d6SSatish Balay     else if (format == ASCII_FORMAT_INFO) {
41657b952d6SSatish Balay       return 0;
41757b952d6SSatish Balay     }
41857b952d6SSatish Balay   }
41957b952d6SSatish Balay 
42057b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
42157b952d6SSatish Balay     Draw       draw;
42257b952d6SSatish Balay     PetscTruth isnull;
42357b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
42457b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
42557b952d6SSatish Balay   }
42657b952d6SSatish Balay 
42757b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
42857b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
42957b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
43057b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
43157b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
43257b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
43357b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
43457b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
43557b952d6SSatish Balay     fflush(fd);
43657b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
43757b952d6SSatish Balay   }
43857b952d6SSatish Balay   else {
43957b952d6SSatish Balay     int size = baij->size;
44057b952d6SSatish Balay     rank = baij->rank;
44157b952d6SSatish Balay     if (size == 1) {
44257b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
44357b952d6SSatish Balay     }
44457b952d6SSatish Balay     else {
44557b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
44657b952d6SSatish Balay       Mat         A;
44757b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
44857b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
44957b952d6SSatish Balay       int         mbs=baij->mbs;
45057b952d6SSatish Balay       Scalar      *a;
45157b952d6SSatish Balay 
45257b952d6SSatish Balay       if (!rank) {
453cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
45457b952d6SSatish Balay         CHKERRQ(ierr);
45557b952d6SSatish Balay       }
45657b952d6SSatish Balay       else {
457cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
45857b952d6SSatish Balay         CHKERRQ(ierr);
45957b952d6SSatish Balay       }
46057b952d6SSatish Balay       PLogObjectParent(mat,A);
46157b952d6SSatish Balay 
46257b952d6SSatish Balay       /* copy over the A part */
46357b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
46457b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
46557b952d6SSatish Balay       row = baij->rstart;
46657b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
46757b952d6SSatish Balay 
46857b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
46957b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
47057b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
47157b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
47257b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
47357b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
474cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
475cee3aa6bSSatish Balay             col++; a += bs;
47657b952d6SSatish Balay           }
47757b952d6SSatish Balay         }
47857b952d6SSatish Balay       }
47957b952d6SSatish Balay       /* copy over the B part */
48057b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
48157b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
48257b952d6SSatish Balay       row = baij->rstart*bs;
48357b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
48457b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
48557b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
48657b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
48757b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
48857b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
489cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
490cee3aa6bSSatish Balay             col++; a += bs;
49157b952d6SSatish Balay           }
49257b952d6SSatish Balay         }
49357b952d6SSatish Balay       }
49457b952d6SSatish Balay       PetscFree(rvals);
4956d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
4966d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
49757b952d6SSatish Balay       if (!rank) {
49857b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
49957b952d6SSatish Balay       }
50057b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
50157b952d6SSatish Balay     }
50257b952d6SSatish Balay   }
50357b952d6SSatish Balay   return 0;
50457b952d6SSatish Balay }
50557b952d6SSatish Balay 
50657b952d6SSatish Balay 
50757b952d6SSatish Balay 
50857b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
50957b952d6SSatish Balay {
51057b952d6SSatish Balay   Mat         mat = (Mat) obj;
51157b952d6SSatish Balay   int         ierr;
51257b952d6SSatish Balay   ViewerType  vtype;
51357b952d6SSatish Balay 
51457b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
51557b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
51657b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
51757b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
51857b952d6SSatish Balay   }
51957b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
52057b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
52157b952d6SSatish Balay   }
52257b952d6SSatish Balay   return 0;
52357b952d6SSatish Balay }
52457b952d6SSatish Balay 
52579bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
52679bdfe76SSatish Balay {
52779bdfe76SSatish Balay   Mat         mat = (Mat) obj;
52879bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
52979bdfe76SSatish Balay   int         ierr;
53079bdfe76SSatish Balay 
53179bdfe76SSatish Balay #if defined(PETSC_LOG)
53279bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
53379bdfe76SSatish Balay #endif
53479bdfe76SSatish Balay 
53579bdfe76SSatish Balay   PetscFree(baij->rowners);
53679bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
53779bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
53879bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
53979bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
54079bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
54179bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
54279bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
54379bdfe76SSatish Balay   PetscFree(baij);
54479bdfe76SSatish Balay   PLogObjectDestroy(mat);
54579bdfe76SSatish Balay   PetscHeaderDestroy(mat);
54679bdfe76SSatish Balay   return 0;
54779bdfe76SSatish Balay }
54879bdfe76SSatish Balay 
549cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
550cee3aa6bSSatish Balay {
551cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
55247b4a8eaSLois Curfman McInnes   int         ierr, nt;
553cee3aa6bSSatish Balay 
554c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
55547b4a8eaSLois Curfman McInnes   if (nt != a->n) {
5560ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and xx");
55747b4a8eaSLois Curfman McInnes   }
558c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
55947b4a8eaSLois Curfman McInnes   if (nt != a->m) {
5600ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and yy");
56147b4a8eaSLois Curfman McInnes   }
562cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
563cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
564cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
565cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
566cee3aa6bSSatish Balay   return 0;
567cee3aa6bSSatish Balay }
568cee3aa6bSSatish Balay 
569cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
570cee3aa6bSSatish Balay {
571cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
572cee3aa6bSSatish Balay   int        ierr;
573cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
574cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
575cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
576cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
577cee3aa6bSSatish Balay   return 0;
578cee3aa6bSSatish Balay }
579cee3aa6bSSatish Balay 
580cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
581cee3aa6bSSatish Balay {
582cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
583cee3aa6bSSatish Balay   int        ierr;
584cee3aa6bSSatish Balay 
585cee3aa6bSSatish Balay   /* do nondiagonal part */
586cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
587cee3aa6bSSatish Balay   /* send it on its way */
588537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
589cee3aa6bSSatish Balay   /* do local part */
590cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
591cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
592cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
593cee3aa6bSSatish Balay   /* but is not perhaps always true. */
594cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
595cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx);CHKERRQ(ierr);
596cee3aa6bSSatish Balay   return 0;
597cee3aa6bSSatish Balay }
598cee3aa6bSSatish Balay 
599cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600cee3aa6bSSatish Balay {
601cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
602cee3aa6bSSatish Balay   int        ierr;
603cee3aa6bSSatish Balay 
604cee3aa6bSSatish Balay   /* do nondiagonal part */
605cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
606cee3aa6bSSatish Balay   /* send it on its way */
607537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
608cee3aa6bSSatish Balay   /* do local part */
609cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
610cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
611cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
612cee3aa6bSSatish Balay   /* but is not perhaps always true. */
613537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
614cee3aa6bSSatish Balay   return 0;
615cee3aa6bSSatish Balay }
616cee3aa6bSSatish Balay 
617cee3aa6bSSatish Balay /*
618cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
619cee3aa6bSSatish Balay    diagonal block
620cee3aa6bSSatish Balay */
621cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
622cee3aa6bSSatish Balay {
623cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
624cee3aa6bSSatish Balay   if (a->M != a->N)
625cee3aa6bSSatish Balay     SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block");
626cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
627cee3aa6bSSatish Balay }
628cee3aa6bSSatish Balay 
629cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
630cee3aa6bSSatish Balay {
631cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
632cee3aa6bSSatish Balay   int        ierr;
633cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
634cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
635cee3aa6bSSatish Balay   return 0;
636cee3aa6bSSatish Balay }
63757b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
63857b952d6SSatish Balay {
63957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
64057b952d6SSatish Balay   *m = mat->M; *n = mat->N;
64157b952d6SSatish Balay   return 0;
64257b952d6SSatish Balay }
64357b952d6SSatish Balay 
64457b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
64557b952d6SSatish Balay {
64657b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
64757b952d6SSatish Balay   *m = mat->m; *n = mat->N;
64857b952d6SSatish Balay   return 0;
64957b952d6SSatish Balay }
65057b952d6SSatish Balay 
65157b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
65257b952d6SSatish Balay {
65357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
65457b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
65557b952d6SSatish Balay   return 0;
65657b952d6SSatish Balay }
65757b952d6SSatish Balay 
658acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
659acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
660acdf5bf4SSatish Balay 
661acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
662acdf5bf4SSatish Balay {
663acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
664acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
665acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
666d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
667d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
668acdf5bf4SSatish Balay 
669acdf5bf4SSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active");
670acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
671acdf5bf4SSatish Balay 
672acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
673acdf5bf4SSatish Balay     /*
674acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
675acdf5bf4SSatish Balay     */
676acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
677bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
678bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
679acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
680acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
681acdf5bf4SSatish Balay     }
682acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
683acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
684acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
685acdf5bf4SSatish Balay   }
686acdf5bf4SSatish Balay 
687acdf5bf4SSatish Balay 
688d9d09a02SSatish Balay   if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows")
689d9d09a02SSatish Balay   lrow = row - brstart;
690acdf5bf4SSatish Balay 
691acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
692acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
693acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
694acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
695acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
696acdf5bf4SSatish Balay   nztot = nzA + nzB;
697acdf5bf4SSatish Balay 
698acdf5bf4SSatish Balay   cmap  = mat->garray;
699acdf5bf4SSatish Balay   if (v  || idx) {
700acdf5bf4SSatish Balay     if (nztot) {
701acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
702acdf5bf4SSatish Balay       int imark = -1;
703acdf5bf4SSatish Balay       if (v) {
704acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
705acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
706d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
707acdf5bf4SSatish Balay           else break;
708acdf5bf4SSatish Balay         }
709acdf5bf4SSatish Balay         imark = i;
710acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
711acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
712acdf5bf4SSatish Balay       }
713acdf5bf4SSatish Balay       if (idx) {
714acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
715acdf5bf4SSatish Balay         if (imark > -1) {
716acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
717bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
718acdf5bf4SSatish Balay           }
719acdf5bf4SSatish Balay         } else {
720acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
721d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
722d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
723acdf5bf4SSatish Balay             else break;
724acdf5bf4SSatish Balay           }
725acdf5bf4SSatish Balay           imark = i;
726acdf5bf4SSatish Balay         }
727d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
728d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
729acdf5bf4SSatish Balay       }
730acdf5bf4SSatish Balay     }
731d212a18eSSatish Balay     else {
732d212a18eSSatish Balay       if (idx) *idx = 0;
733d212a18eSSatish Balay       if (v)   *v   = 0;
734d212a18eSSatish Balay     }
735acdf5bf4SSatish Balay   }
736acdf5bf4SSatish Balay   *nz = nztot;
737acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
738acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
739acdf5bf4SSatish Balay   return 0;
740acdf5bf4SSatish Balay }
741acdf5bf4SSatish Balay 
742acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
743acdf5bf4SSatish Balay {
744acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
745acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
746acdf5bf4SSatish Balay     SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called");
747acdf5bf4SSatish Balay   }
748acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
749acdf5bf4SSatish Balay   return 0;
750acdf5bf4SSatish Balay }
751acdf5bf4SSatish Balay 
7525a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
7535a838052SSatish Balay {
7545a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
7555a838052SSatish Balay   *bs = baij->bs;
7565a838052SSatish Balay   return 0;
7575a838052SSatish Balay }
7585a838052SSatish Balay 
75958667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A)
76058667388SSatish Balay {
76158667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
76258667388SSatish Balay   int         ierr;
76358667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
76458667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
76558667388SSatish Balay   return 0;
76658667388SSatish Balay }
7670ac07820SSatish Balay 
7684e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
7690ac07820SSatish Balay {
7704e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
7714e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
7727d57db60SLois Curfman McInnes   int         ierr;
7737d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
7740ac07820SSatish Balay 
7754e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
7764e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
7774e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
7784e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
7794e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
7804e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
7814e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
7824e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
7834e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
7840ac07820SSatish Balay   if (flag == MAT_LOCAL) {
7854e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
7864e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
7874e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
7884e220ebcSLois Curfman McInnes     info->memory       = isend[3];
7894e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
7900ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
7910ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm);
7924e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7934e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7944e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7954e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7964e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7970ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
7980ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm);
7994e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8004e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8014e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8024e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8034e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8040ac07820SSatish Balay   }
8054e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
8064e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
8074e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
8080ac07820SSatish Balay   return 0;
8090ac07820SSatish Balay }
8100ac07820SSatish Balay 
81158667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op)
81258667388SSatish Balay {
81358667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
81458667388SSatish Balay 
81558667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
81658667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
81758667388SSatish Balay       op == MAT_COLUMNS_SORTED ||
81858667388SSatish Balay       op == MAT_ROW_ORIENTED) {
81958667388SSatish Balay         MatSetOption(a->A,op);
82058667388SSatish Balay         MatSetOption(a->B,op);
82158667388SSatish Balay   }
82258667388SSatish Balay   else if (op == MAT_ROWS_SORTED ||
82358667388SSatish Balay            op == MAT_SYMMETRIC ||
82458667388SSatish Balay            op == MAT_STRUCTURALLY_SYMMETRIC ||
82558667388SSatish Balay            op == MAT_YES_NEW_DIAGONALS)
82658667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
82758667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
82858667388SSatish Balay     a->roworiented = 0;
82958667388SSatish Balay     MatSetOption(a->A,op);
83058667388SSatish Balay     MatSetOption(a->B,op);
83158667388SSatish Balay   }
83258667388SSatish Balay   else if (op == MAT_NO_NEW_DIAGONALS)
8330ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:MAT_NO_NEW_DIAGONALS");}
83458667388SSatish Balay   else
8350ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:unknown option");}
83658667388SSatish Balay   return 0;
83758667388SSatish Balay }
83858667388SSatish Balay 
8390ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
8400ac07820SSatish Balay {
8410ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
8420ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
8430ac07820SSatish Balay   Mat        B;
8440ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
8450ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
8460ac07820SSatish Balay   Scalar     *a;
8470ac07820SSatish Balay 
8480ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
8490ac07820SSatish Balay     SETERRQ(1,"MatTranspose_MPIBAIJ:Square matrix only for in-place");
8500ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
8510ac07820SSatish Balay   CHKERRQ(ierr);
8520ac07820SSatish Balay 
8530ac07820SSatish Balay   /* copy over the A part */
8540ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
8550ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
8560ac07820SSatish Balay   row = baij->rstart;
8570ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
8580ac07820SSatish Balay 
8590ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
8600ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
8610ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
8620ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
8630ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
8640ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
8650ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
8660ac07820SSatish Balay         col++; a += bs;
8670ac07820SSatish Balay       }
8680ac07820SSatish Balay     }
8690ac07820SSatish Balay   }
8700ac07820SSatish Balay   /* copy over the B part */
8710ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
8720ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
8730ac07820SSatish Balay   row = baij->rstart*bs;
8740ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
8750ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
8760ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
8770ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
8780ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
8790ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
8800ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
8810ac07820SSatish Balay         col++; a += bs;
8820ac07820SSatish Balay       }
8830ac07820SSatish Balay     }
8840ac07820SSatish Balay   }
8850ac07820SSatish Balay   PetscFree(rvals);
8860ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8870ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8880ac07820SSatish Balay 
8890ac07820SSatish Balay   if (matout != PETSC_NULL) {
8900ac07820SSatish Balay     *matout = B;
8910ac07820SSatish Balay   } else {
8920ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
8930ac07820SSatish Balay     PetscFree(baij->rowners);
8940ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
8950ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
8960ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
8970ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
8980ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
8990ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
9000ac07820SSatish Balay     PetscFree(baij);
9010ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
9020ac07820SSatish Balay     PetscHeaderDestroy(B);
9030ac07820SSatish Balay   }
9040ac07820SSatish Balay   return 0;
9050ac07820SSatish Balay }
906*0e95ebc0SSatish Balay 
907*0e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
908*0e95ebc0SSatish Balay {
909*0e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
910*0e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
911*0e95ebc0SSatish Balay   int ierr,s1,s2,s3;
912*0e95ebc0SSatish Balay 
913*0e95ebc0SSatish Balay   if (ll)  {
914*0e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
915*0e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
916*0e95ebc0SSatish Balay     if (s1!=s2) SETERRQ(1,"MatDiagonalScale_MPIBAIJ: non-conforming local sizes");
917*0e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
918*0e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
919*0e95ebc0SSatish Balay   }
920*0e95ebc0SSatish Balay   if (rr) SETERRQ(1,"MatDiagonalScale_MPIBAIJ:not supported for right vector");
921*0e95ebc0SSatish Balay   return 0;
922*0e95ebc0SSatish Balay }
923*0e95ebc0SSatish Balay 
9240ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
9250ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
9260ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
9270ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
9280ac07820SSatish Balay    routine.
9290ac07820SSatish Balay */
9300ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
9310ac07820SSatish Balay {
9320ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
9330ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
9340ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
9350ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
9360ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
9370ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
9380ac07820SSatish Balay   MPI_Comm       comm = A->comm;
9390ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
9400ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
9410ac07820SSatish Balay   IS             istmp;
9420ac07820SSatish Balay 
9430ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
9440ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
9450ac07820SSatish Balay 
9460ac07820SSatish Balay   /*  first count number of contributors to each processor */
9470ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
9480ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
9490ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
9500ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
9510ac07820SSatish Balay     idx = rows[i];
9520ac07820SSatish Balay     found = 0;
9530ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
9540ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
9550ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
9560ac07820SSatish Balay       }
9570ac07820SSatish Balay     }
9580ac07820SSatish Balay     if (!found) SETERRQ(1,"MatZeroRows_MPIBAIJ:Index out of range");
9590ac07820SSatish Balay   }
9600ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
9610ac07820SSatish Balay 
9620ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
9630ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
9640ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
9650ac07820SSatish Balay   nrecvs = work[rank];
9660ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
9670ac07820SSatish Balay   nmax = work[rank];
9680ac07820SSatish Balay   PetscFree(work);
9690ac07820SSatish Balay 
9700ac07820SSatish Balay   /* post receives:   */
9710ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
9720ac07820SSatish Balay   CHKPTRQ(rvalues);
9730ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
9740ac07820SSatish Balay   CHKPTRQ(recv_waits);
9750ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
9760ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
9770ac07820SSatish Balay   }
9780ac07820SSatish Balay 
9790ac07820SSatish Balay   /* do sends:
9800ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
9810ac07820SSatish Balay          the ith processor
9820ac07820SSatish Balay   */
9830ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
9840ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
9850ac07820SSatish Balay   CHKPTRQ(send_waits);
9860ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
9870ac07820SSatish Balay   starts[0] = 0;
9880ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
9890ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
9900ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
9910ac07820SSatish Balay   }
9920ac07820SSatish Balay   ISRestoreIndices(is,&rows);
9930ac07820SSatish Balay 
9940ac07820SSatish Balay   starts[0] = 0;
9950ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
9960ac07820SSatish Balay   count = 0;
9970ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
9980ac07820SSatish Balay     if (procs[i]) {
9990ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
10000ac07820SSatish Balay     }
10010ac07820SSatish Balay   }
10020ac07820SSatish Balay   PetscFree(starts);
10030ac07820SSatish Balay 
10040ac07820SSatish Balay   base = owners[rank]*bs;
10050ac07820SSatish Balay 
10060ac07820SSatish Balay   /*  wait on receives */
10070ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
10080ac07820SSatish Balay   source = lens + nrecvs;
10090ac07820SSatish Balay   count  = nrecvs; slen = 0;
10100ac07820SSatish Balay   while (count) {
10110ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
10120ac07820SSatish Balay     /* unpack receives into our local space */
10130ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
10140ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
10150ac07820SSatish Balay     lens[imdex]  = n;
10160ac07820SSatish Balay     slen += n;
10170ac07820SSatish Balay     count--;
10180ac07820SSatish Balay   }
10190ac07820SSatish Balay   PetscFree(recv_waits);
10200ac07820SSatish Balay 
10210ac07820SSatish Balay   /* move the data into the send scatter */
10220ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
10230ac07820SSatish Balay   count = 0;
10240ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
10250ac07820SSatish Balay     values = rvalues + i*nmax;
10260ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
10270ac07820SSatish Balay       lrows[count++] = values[j] - base;
10280ac07820SSatish Balay     }
10290ac07820SSatish Balay   }
10300ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
10310ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
10320ac07820SSatish Balay 
10330ac07820SSatish Balay   /* actually zap the local rows */
1034537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
10350ac07820SSatish Balay   PLogObjectParent(A,istmp);
10360ac07820SSatish Balay   PetscFree(lrows);
10370ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
10380ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
10390ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
10400ac07820SSatish Balay 
10410ac07820SSatish Balay   /* wait on sends */
10420ac07820SSatish Balay   if (nsends) {
10430ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
10440ac07820SSatish Balay     CHKPTRQ(send_status);
10450ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
10460ac07820SSatish Balay     PetscFree(send_status);
10470ac07820SSatish Balay   }
10480ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
10490ac07820SSatish Balay 
10500ac07820SSatish Balay   return 0;
10510ac07820SSatish Balay }
1052ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
1053ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A)
1054ba4ca20aSSatish Balay {
1055ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1056ba4ca20aSSatish Balay 
1057ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1058ba4ca20aSSatish Balay   else return 0;
1059ba4ca20aSSatish Balay }
10600ac07820SSatish Balay 
10610ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
10620ac07820SSatish Balay 
106379bdfe76SSatish Balay /* -------------------------------------------------------------------*/
106479bdfe76SSatish Balay static struct _MatOps MatOps = {
1065bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
106693bc47c4SSatish Balay   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,MatSolve_MPIBAIJ,
106793bc47c4SSatish Balay   MatSolveAdd_MPIBAIJ,MatSolveTrans_MPIBAIJ,MatSolveTransAdd_MPIBAIJ,MatLUFactor_MPIBAIJ,
10680ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
1069*0e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
107058667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
10713b2fbd54SBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,MatLUFactorSymbolic_MPIBAIJ,
107293bc47c4SSatish Balay   MatLUFactorNumeric_MPIBAIJ,0,0,MatGetSize_MPIBAIJ,
107393bc47c4SSatish Balay   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,MatILUFactorSymbolic_MPIBAIJ,0,
1074905e6a2fSBarry Smith   0,0,0,MatConvertSameType_MPIBAIJ,0,0,
1075d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1076ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
10773b2fbd54SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
10783b2fbd54SBarry Smith   MatGetRowIJ_MPIBAIJ,
10793b2fbd54SBarry Smith   MatRestoreRowIJ_MPIBAIJ};
108079bdfe76SSatish Balay 
108179bdfe76SSatish Balay 
108279bdfe76SSatish Balay /*@C
108379bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
108479bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
108579bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
108679bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
108779bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
108879bdfe76SSatish Balay 
108979bdfe76SSatish Balay    Input Parameters:
109079bdfe76SSatish Balay .  comm - MPI communicator
109179bdfe76SSatish Balay .  bs   - size of blockk
109279bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
109392e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
109492e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
109592e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
109692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
109792e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
109879bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
109992e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
110079bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
110179bdfe76SSatish Balay            submatrix  (same for all local rows)
110292e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
110392e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
110492e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
110592e8d321SLois Curfman McInnes            it is zero.
110692e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
110779bdfe76SSatish Balay            submatrix (same for all local rows).
110892e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
110992e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
111092e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
111179bdfe76SSatish Balay 
111279bdfe76SSatish Balay    Output Parameter:
111379bdfe76SSatish Balay .  A - the matrix
111479bdfe76SSatish Balay 
111579bdfe76SSatish Balay    Notes:
111679bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
111779bdfe76SSatish Balay    (possibly both).
111879bdfe76SSatish Balay 
111979bdfe76SSatish Balay    Storage Information:
112079bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
112179bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
112279bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
112379bdfe76SSatish Balay    local matrix (a rectangular submatrix).
112479bdfe76SSatish Balay 
112579bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
112679bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
112779bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
112879bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
112979bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
113079bdfe76SSatish Balay 
113179bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
113279bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
113379bdfe76SSatish Balay 
113479bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
113579bdfe76SSatish Balay $         -------------------
113679bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
113779bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
113879bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
113979bdfe76SSatish Balay $         -------------------
114079bdfe76SSatish Balay $
114179bdfe76SSatish Balay 
114279bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
114379bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
114479bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
114557b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
114679bdfe76SSatish Balay 
114779bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
114879bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
114979bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
115092e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
115192e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
115292e8d321SLois Curfman McInnes    matrices and the file $(PETSC_DIR)/Performance.
115379bdfe76SSatish Balay 
115492e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
115579bdfe76SSatish Balay 
115679bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
115779bdfe76SSatish Balay @*/
115879bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
115979bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
116079bdfe76SSatish Balay {
116179bdfe76SSatish Balay   Mat          B;
116279bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
1163cee3aa6bSSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE;
116479bdfe76SSatish Balay 
116579bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
116679bdfe76SSatish Balay   *A = 0;
116779bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
116879bdfe76SSatish Balay   PLogObjectCreate(B);
116979bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
117079bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
117179bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
117279bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
117379bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
117457b952d6SSatish Balay 
117579bdfe76SSatish Balay   B->factor     = 0;
117679bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
117779bdfe76SSatish Balay 
117879bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
117979bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
118079bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
118179bdfe76SSatish Balay 
118279bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
118357b952d6SSatish Balay     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1184cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified");
1185cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified");
1186cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1187cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
118879bdfe76SSatish Balay 
118979bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
119079bdfe76SSatish Balay     work[0] = m; work[1] = n;
119179bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
119279bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
119379bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
119479bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
119579bdfe76SSatish Balay   }
119679bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
119779bdfe76SSatish Balay     Mbs = M/bs;
119879bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
119979bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
120079bdfe76SSatish Balay     m   = mbs*bs;
120179bdfe76SSatish Balay   }
120279bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
120379bdfe76SSatish Balay     Nbs = N/bs;
120479bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
120579bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
120679bdfe76SSatish Balay     n   = nbs*bs;
120779bdfe76SSatish Balay   }
1208cee3aa6bSSatish Balay   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
120979bdfe76SSatish Balay 
121079bdfe76SSatish Balay   b->m = m; B->m = m;
121179bdfe76SSatish Balay   b->n = n; B->n = n;
121279bdfe76SSatish Balay   b->N = N; B->N = N;
121379bdfe76SSatish Balay   b->M = M; B->M = M;
121479bdfe76SSatish Balay   b->bs  = bs;
121579bdfe76SSatish Balay   b->bs2 = bs*bs;
121679bdfe76SSatish Balay   b->mbs = mbs;
121779bdfe76SSatish Balay   b->nbs = nbs;
121879bdfe76SSatish Balay   b->Mbs = Mbs;
121979bdfe76SSatish Balay   b->Nbs = Nbs;
122079bdfe76SSatish Balay 
122179bdfe76SSatish Balay   /* build local table of row and column ownerships */
122279bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
122379bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
12240ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
122579bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
122679bdfe76SSatish Balay   b->rowners[0] = 0;
122779bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
122879bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
122979bdfe76SSatish Balay   }
123079bdfe76SSatish Balay   b->rstart = b->rowners[b->rank];
123179bdfe76SSatish Balay   b->rend   = b->rowners[b->rank+1];
123279bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
123379bdfe76SSatish Balay   b->cowners[0] = 0;
123479bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
123579bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
123679bdfe76SSatish Balay   }
123779bdfe76SSatish Balay   b->cstart = b->cowners[b->rank];
123879bdfe76SSatish Balay   b->cend   = b->cowners[b->rank+1];
123979bdfe76SSatish Balay 
124079bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
124179bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
124279bdfe76SSatish Balay   PLogObjectParent(B,b->A);
124379bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
124479bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
124579bdfe76SSatish Balay   PLogObjectParent(B,b->B);
124679bdfe76SSatish Balay 
124779bdfe76SSatish Balay   /* build cache for off array entries formed */
124879bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
124979bdfe76SSatish Balay   b->colmap      = 0;
125079bdfe76SSatish Balay   b->garray      = 0;
125179bdfe76SSatish Balay   b->roworiented = 1;
125279bdfe76SSatish Balay 
125379bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
125479bdfe76SSatish Balay   b->lvec      = 0;
125579bdfe76SSatish Balay   b->Mvctx     = 0;
125679bdfe76SSatish Balay 
125779bdfe76SSatish Balay   /* stuff for MatGetRow() */
125879bdfe76SSatish Balay   b->rowindices   = 0;
125979bdfe76SSatish Balay   b->rowvalues    = 0;
126079bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
126179bdfe76SSatish Balay 
126279bdfe76SSatish Balay   *A = B;
126379bdfe76SSatish Balay   return 0;
126479bdfe76SSatish Balay }
12650ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
12660ac07820SSatish Balay {
12670ac07820SSatish Balay   Mat        mat;
12680ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
12690ac07820SSatish Balay   int        ierr, len=0, flg;
12700ac07820SSatish Balay 
12710ac07820SSatish Balay   *newmat       = 0;
12720ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
12730ac07820SSatish Balay   PLogObjectCreate(mat);
12740ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
12750ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
12760ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
12770ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
12780ac07820SSatish Balay   mat->factor     = matin->factor;
12790ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
12800ac07820SSatish Balay 
12810ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
12820ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
12830ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
12840ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
12850ac07820SSatish Balay 
12860ac07820SSatish Balay   a->bs  = oldmat->bs;
12870ac07820SSatish Balay   a->bs2 = oldmat->bs2;
12880ac07820SSatish Balay   a->mbs = oldmat->mbs;
12890ac07820SSatish Balay   a->nbs = oldmat->nbs;
12900ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
12910ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
12920ac07820SSatish Balay 
12930ac07820SSatish Balay   a->rstart       = oldmat->rstart;
12940ac07820SSatish Balay   a->rend         = oldmat->rend;
12950ac07820SSatish Balay   a->cstart       = oldmat->cstart;
12960ac07820SSatish Balay   a->cend         = oldmat->cend;
12970ac07820SSatish Balay   a->size         = oldmat->size;
12980ac07820SSatish Balay   a->rank         = oldmat->rank;
12990ac07820SSatish Balay   a->insertmode   = NOT_SET_VALUES;
13000ac07820SSatish Balay   a->rowvalues    = 0;
13010ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
13020ac07820SSatish Balay 
13030ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
13040ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
13050ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
13060ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
13070ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
13080ac07820SSatish Balay   if (oldmat->colmap) {
13090ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
13100ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
13110ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
13120ac07820SSatish Balay   } else a->colmap = 0;
13130ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
13140ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
13150ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
13160ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
13170ac07820SSatish Balay   } else a->garray = 0;
13180ac07820SSatish Balay 
13190ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
13200ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
13210ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
13220ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
13230ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
13240ac07820SSatish Balay   PLogObjectParent(mat,a->A);
13250ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
13260ac07820SSatish Balay   PLogObjectParent(mat,a->B);
13270ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
13280ac07820SSatish Balay   if (flg) {
13290ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
13300ac07820SSatish Balay   }
13310ac07820SSatish Balay   *newmat = mat;
13320ac07820SSatish Balay   return 0;
13330ac07820SSatish Balay }
133457b952d6SSatish Balay 
133557b952d6SSatish Balay #include "sys.h"
133657b952d6SSatish Balay 
133757b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
133857b952d6SSatish Balay {
133957b952d6SSatish Balay   Mat          A;
134057b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
134157b952d6SSatish Balay   Scalar       *vals,*buf;
134257b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
134357b952d6SSatish Balay   MPI_Status   status;
1344cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
134557b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
134657b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
134757b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
134857b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
134957b952d6SSatish Balay 
135057b952d6SSatish Balay 
135157b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
135257b952d6SSatish Balay   bs2  = bs*bs;
135357b952d6SSatish Balay 
135457b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
135557b952d6SSatish Balay   if (!rank) {
135657b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
135757b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1358cee3aa6bSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object");
135957b952d6SSatish Balay   }
136057b952d6SSatish Balay 
136157b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
136257b952d6SSatish Balay   M = header[1]; N = header[2];
136357b952d6SSatish Balay 
136457b952d6SSatish Balay   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
136557b952d6SSatish Balay 
136657b952d6SSatish Balay   /*
136757b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
136857b952d6SSatish Balay      divisible by the blocksize
136957b952d6SSatish Balay   */
137057b952d6SSatish Balay   Mbs        = M/bs;
137157b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
137257b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
137357b952d6SSatish Balay   else                  Mbs++;
137457b952d6SSatish Balay   if (extra_rows &&!rank) {
1375537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
137657b952d6SSatish Balay   }
1377537820f0SBarry Smith 
137857b952d6SSatish Balay   /* determine ownership of all rows */
137957b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
138057b952d6SSatish Balay   m   = mbs * bs;
1381cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1382cee3aa6bSSatish Balay   browners = rowners + size + 1;
138357b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
138457b952d6SSatish Balay   rowners[0] = 0;
1385cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1386cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
138757b952d6SSatish Balay   rstart = rowners[rank];
138857b952d6SSatish Balay   rend   = rowners[rank+1];
138957b952d6SSatish Balay 
139057b952d6SSatish Balay   /* distribute row lengths to all processors */
139157b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
139257b952d6SSatish Balay   if (!rank) {
139357b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
139457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
139557b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
139657b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1397cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1398cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
139957b952d6SSatish Balay     PetscFree(sndcounts);
140057b952d6SSatish Balay   }
140157b952d6SSatish Balay   else {
140257b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
140357b952d6SSatish Balay   }
140457b952d6SSatish Balay 
140557b952d6SSatish Balay   if (!rank) {
140657b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
140757b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
140857b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
140957b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
141057b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
141157b952d6SSatish Balay         procsnz[i] += rowlengths[j];
141257b952d6SSatish Balay       }
141357b952d6SSatish Balay     }
141457b952d6SSatish Balay     PetscFree(rowlengths);
141557b952d6SSatish Balay 
141657b952d6SSatish Balay     /* determine max buffer needed and allocate it */
141757b952d6SSatish Balay     maxnz = 0;
141857b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
141957b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
142057b952d6SSatish Balay     }
142157b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
142257b952d6SSatish Balay 
142357b952d6SSatish Balay     /* read in my part of the matrix column indices  */
142457b952d6SSatish Balay     nz = procsnz[0];
142557b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
142657b952d6SSatish Balay     mycols = ibuf;
1427cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
142857b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1429cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1430cee3aa6bSSatish Balay 
143157b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
143257b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
143357b952d6SSatish Balay       nz = procsnz[i];
143457b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
143557b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
143657b952d6SSatish Balay     }
143757b952d6SSatish Balay     /* read in the stuff for the last proc */
143857b952d6SSatish Balay     if ( size != 1 ) {
143957b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
144057b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
144157b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1442cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
144357b952d6SSatish Balay     }
144457b952d6SSatish Balay     PetscFree(cols);
144557b952d6SSatish Balay   }
144657b952d6SSatish Balay   else {
144757b952d6SSatish Balay     /* determine buffer space needed for message */
144857b952d6SSatish Balay     nz = 0;
144957b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
145057b952d6SSatish Balay       nz += locrowlens[i];
145157b952d6SSatish Balay     }
145257b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
145357b952d6SSatish Balay     mycols = ibuf;
145457b952d6SSatish Balay     /* receive message of column indices*/
145557b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
145657b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1457cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
145857b952d6SSatish Balay   }
145957b952d6SSatish Balay 
146057b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1461cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1462cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
146357b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1464cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
146557b952d6SSatish Balay   masked1 = mask    + Mbs;
146657b952d6SSatish Balay   masked2 = masked1 + Mbs;
146757b952d6SSatish Balay   rowcount = 0; nzcount = 0;
146857b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
146957b952d6SSatish Balay     dcount  = 0;
147057b952d6SSatish Balay     odcount = 0;
147157b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
147257b952d6SSatish Balay       kmax = locrowlens[rowcount];
147357b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
147457b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
147557b952d6SSatish Balay         if (!mask[tmp]) {
147657b952d6SSatish Balay           mask[tmp] = 1;
147757b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
147857b952d6SSatish Balay           else masked1[dcount++] = tmp;
147957b952d6SSatish Balay         }
148057b952d6SSatish Balay       }
148157b952d6SSatish Balay       rowcount++;
148257b952d6SSatish Balay     }
1483cee3aa6bSSatish Balay 
148457b952d6SSatish Balay     dlens[i]  = dcount;
148557b952d6SSatish Balay     odlens[i] = odcount;
1486cee3aa6bSSatish Balay 
148757b952d6SSatish Balay     /* zero out the mask elements we set */
148857b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
148957b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
149057b952d6SSatish Balay   }
1491cee3aa6bSSatish Balay 
149257b952d6SSatish Balay   /* create our matrix */
1493537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1494537820f0SBarry Smith          CHKERRQ(ierr);
149557b952d6SSatish Balay   A = *newmat;
14966d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
149757b952d6SSatish Balay 
149857b952d6SSatish Balay   if (!rank) {
149957b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
150057b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
150157b952d6SSatish Balay     nz = procsnz[0];
150257b952d6SSatish Balay     vals = buf;
1503cee3aa6bSSatish Balay     mycols = ibuf;
1504cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
150557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1506cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1507537820f0SBarry Smith 
150857b952d6SSatish Balay     /* insert into matrix */
150957b952d6SSatish Balay     jj      = rstart*bs;
151057b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
151157b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
151257b952d6SSatish Balay       mycols += locrowlens[i];
151357b952d6SSatish Balay       vals   += locrowlens[i];
151457b952d6SSatish Balay       jj++;
151557b952d6SSatish Balay     }
151657b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
151757b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
151857b952d6SSatish Balay       nz = procsnz[i];
151957b952d6SSatish Balay       vals = buf;
152057b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
152157b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
152257b952d6SSatish Balay     }
152357b952d6SSatish Balay     /* the last proc */
152457b952d6SSatish Balay     if ( size != 1 ){
152557b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1526cee3aa6bSSatish Balay       vals = buf;
152757b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
152857b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1529cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
153057b952d6SSatish Balay     }
153157b952d6SSatish Balay     PetscFree(procsnz);
153257b952d6SSatish Balay   }
153357b952d6SSatish Balay   else {
153457b952d6SSatish Balay     /* receive numeric values */
153557b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
153657b952d6SSatish Balay 
153757b952d6SSatish Balay     /* receive message of values*/
153857b952d6SSatish Balay     vals = buf;
1539cee3aa6bSSatish Balay     mycols = ibuf;
154057b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
154157b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1542cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
154357b952d6SSatish Balay 
154457b952d6SSatish Balay     /* insert into matrix */
154557b952d6SSatish Balay     jj      = rstart*bs;
1546cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
154757b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
154857b952d6SSatish Balay       mycols += locrowlens[i];
154957b952d6SSatish Balay       vals   += locrowlens[i];
155057b952d6SSatish Balay       jj++;
155157b952d6SSatish Balay     }
155257b952d6SSatish Balay   }
155357b952d6SSatish Balay   PetscFree(locrowlens);
155457b952d6SSatish Balay   PetscFree(buf);
155557b952d6SSatish Balay   PetscFree(ibuf);
155657b952d6SSatish Balay   PetscFree(rowners);
155757b952d6SSatish Balay   PetscFree(dlens);
1558cee3aa6bSSatish Balay   PetscFree(mask);
15596d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15606d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
156157b952d6SSatish Balay   return 0;
156257b952d6SSatish Balay }
156357b952d6SSatish Balay 
156457b952d6SSatish Balay 
1565