xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 026e39d04bcf2a445c0a9332c845c324154b2e8f)
179bdfe76SSatish Balay #ifndef lint
2*026e39d0SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.41 1996/12/07 00:50:45 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2357b952d6SSatish Balay 
243b2fbd54SBarry Smith 
25537820f0SBarry Smith /*
26537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2957b952d6SSatish Balay    length of colmap equals the global matrix length.
3057b952d6SSatish Balay */
31*026e39d0SSatish Balay #undef __FUNCTION__
32*026e39d0SSatish Balay #define __FUNCTION__ "CreateColmap_MPIBAIJ_Private"
3357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3457b952d6SSatish Balay {
3557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
37928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3857b952d6SSatish Balay 
3957b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
4057b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4157b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
42928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4357b952d6SSatish Balay   return 0;
4457b952d6SSatish Balay }
4557b952d6SSatish Balay 
46*026e39d0SSatish Balay #undef __FUNCTION__
47*026e39d0SSatish Balay #define __FUNCTION__ "MatGetRowIJ_MPIBAIJ("
483b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
5057b952d6SSatish Balay {
513b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5257b952d6SSatish Balay   int         ierr;
533b2fbd54SBarry Smith   if (aij->size == 1) {
543b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
553b2fbd54SBarry Smith   } else SETERRQ(1,"MatGetRowIJ_MPIBAIJ:not supported in parallel");
563b2fbd54SBarry Smith   return 0;
573b2fbd54SBarry Smith }
583b2fbd54SBarry Smith 
59*026e39d0SSatish Balay #undef __FUNCTION__
60*026e39d0SSatish Balay #define __FUNCTION__ "MatRestoreRowIJ_MPIBAIJ"
613b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
623b2fbd54SBarry Smith                                 PetscTruth *done)
633b2fbd54SBarry Smith {
643b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
653b2fbd54SBarry Smith   int        ierr;
663b2fbd54SBarry Smith   if (aij->size == 1) {
673b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
683b2fbd54SBarry Smith   } else SETERRQ(1,"MatRestoreRowIJ_MPIBAIJ:not supported in parallel");
6957b952d6SSatish Balay   return 0;
7057b952d6SSatish Balay }
7157b952d6SSatish Balay 
72639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
73*026e39d0SSatish Balay #undef __FUNCTION__
74*026e39d0SSatish Balay #define __FUNCTION__ "MatSetValues_MPIBAIJ"
7557b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
7657b952d6SSatish Balay {
7757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
7857b952d6SSatish Balay   Scalar      value;
794fa0d573SSatish Balay   int         ierr,i,j,row,col;
804fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
814fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
824fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
8357b952d6SSatish Balay 
842c3acbe9SBarry Smith #if defined(PETSC_BOPT_g)
8557b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
8657b952d6SSatish Balay     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
8757b952d6SSatish Balay   }
882c3acbe9SBarry Smith #endif
8957b952d6SSatish Balay   baij->insertmode = addv;
9057b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
91639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
9257b952d6SSatish Balay     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
9357b952d6SSatish Balay     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
94639f9d9dSBarry Smith #endif
9557b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
9657b952d6SSatish Balay       row = im[i] - rstart_orig;
9757b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
9857b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
9957b952d6SSatish Balay           col = in[j] - cstart_orig;
10057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
101639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
10257b952d6SSatish Balay         }
103639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
104639f9d9dSBarry Smith         else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");}
105639f9d9dSBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");}
106639f9d9dSBarry Smith #endif
10757b952d6SSatish Balay         else {
10857b952d6SSatish Balay           if (mat->was_assembled) {
109905e6a2fSBarry Smith             if (!baij->colmap) {
110905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
111905e6a2fSBarry Smith             }
112905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
11357b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
11457b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
11557b952d6SSatish Balay               col =  in[j];
11657b952d6SSatish Balay             }
11757b952d6SSatish Balay           }
11857b952d6SSatish Balay           else col = in[j];
11957b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
120639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
12157b952d6SSatish Balay         }
12257b952d6SSatish Balay       }
12357b952d6SSatish Balay     }
12457b952d6SSatish Balay     else {
12590f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
12657b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
12757b952d6SSatish Balay       }
12857b952d6SSatish Balay       else {
12990f02eecSBarry Smith         if (!baij->donotstash) {
13057b952d6SSatish Balay           row = im[i];
13157b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
13257b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
13357b952d6SSatish Balay           }
13457b952d6SSatish Balay         }
13557b952d6SSatish Balay       }
13657b952d6SSatish Balay     }
13790f02eecSBarry Smith   }
13857b952d6SSatish Balay   return 0;
13957b952d6SSatish Balay }
14057b952d6SSatish Balay 
141*026e39d0SSatish Balay #undef __FUNCTION__
142*026e39d0SSatish Balay #define __FUNCTION__ "MatGetValues_MPIBAIJ"
143d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
144d6de1c52SSatish Balay {
145d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
146d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
147d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
148d6de1c52SSatish Balay 
149d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
150d6de1c52SSatish Balay     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row");
151d6de1c52SSatish Balay     if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large");
152d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
153d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
154d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
155d6de1c52SSatish Balay         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column");
156d6de1c52SSatish Balay         if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large");
157d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
158d6de1c52SSatish Balay           col = idxn[j] - bscstart;
159d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
160d6de1c52SSatish Balay         }
161d6de1c52SSatish Balay         else {
162905e6a2fSBarry Smith           if (!baij->colmap) {
163905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
164905e6a2fSBarry Smith           }
165e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
166dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
167d9d09a02SSatish Balay           else {
168dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
169d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
170d6de1c52SSatish Balay           }
171d6de1c52SSatish Balay         }
172d6de1c52SSatish Balay       }
173d9d09a02SSatish Balay     }
174d6de1c52SSatish Balay     else {
175d6de1c52SSatish Balay       SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported");
176d6de1c52SSatish Balay     }
177d6de1c52SSatish Balay   }
178d6de1c52SSatish Balay   return 0;
179d6de1c52SSatish Balay }
180d6de1c52SSatish Balay 
181*026e39d0SSatish Balay #undef __FUNCTION__
182*026e39d0SSatish Balay #define __FUNCTION__ "MatNorm_MPIBAIJ"
183d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
184d6de1c52SSatish Balay {
185d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
186d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
187acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
188d6de1c52SSatish Balay   double     sum = 0.0;
189d6de1c52SSatish Balay   Scalar     *v;
190d6de1c52SSatish Balay 
191d6de1c52SSatish Balay   if (baij->size == 1) {
192d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
193d6de1c52SSatish Balay   } else {
194d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
195d6de1c52SSatish Balay       v = amat->a;
196d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
197d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
198d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
199d6de1c52SSatish Balay #else
200d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
201d6de1c52SSatish Balay #endif
202d6de1c52SSatish Balay       }
203d6de1c52SSatish Balay       v = bmat->a;
204d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
205d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
206d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
207d6de1c52SSatish Balay #else
208d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
209d6de1c52SSatish Balay #endif
210d6de1c52SSatish Balay       }
211d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
212d6de1c52SSatish Balay       *norm = sqrt(*norm);
213d6de1c52SSatish Balay     }
214acdf5bf4SSatish Balay     else
215b0267e0aSLois Curfman McInnes       SETERRQ(PETSC_ERR_SUP,"MatNorm_MPIBAIJ:No support for this norm yet");
216d6de1c52SSatish Balay   }
217d6de1c52SSatish Balay   return 0;
218d6de1c52SSatish Balay }
21957b952d6SSatish Balay 
220*026e39d0SSatish Balay #undef __FUNCTION__
221*026e39d0SSatish Balay #define __FUNCTION__ "MatAssemblyBegin_MPIBAIJ"
22257b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
22357b952d6SSatish Balay {
22457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
22557b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
22657b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
22757b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
22857b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
22957b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
23057b952d6SSatish Balay   InsertMode  addv;
23157b952d6SSatish Balay   Scalar      *rvalues,*svalues;
23257b952d6SSatish Balay 
23357b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
23457b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
23557b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
23657b952d6SSatish Balay     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
23757b952d6SSatish Balay   }
23857b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
23957b952d6SSatish Balay 
24057b952d6SSatish Balay   /*  first count number of contributors to each processor */
24157b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
24257b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
24357b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
24457b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
24557b952d6SSatish Balay     idx = baij->stash.idx[i];
24657b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
24757b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
24857b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
24957b952d6SSatish Balay       }
25057b952d6SSatish Balay     }
25157b952d6SSatish Balay   }
25257b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
25357b952d6SSatish Balay 
25457b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
25557b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
25657b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
25757b952d6SSatish Balay   nreceives = work[rank];
25857b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
25957b952d6SSatish Balay   nmax = work[rank];
26057b952d6SSatish Balay   PetscFree(work);
26157b952d6SSatish Balay 
26257b952d6SSatish Balay   /* post receives:
26357b952d6SSatish Balay        1) each message will consist of ordered pairs
26457b952d6SSatish Balay      (global index,value) we store the global index as a double
26557b952d6SSatish Balay      to simplify the message passing.
26657b952d6SSatish Balay        2) since we don't know how long each individual message is we
26757b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
26857b952d6SSatish Balay      this is a lot of wasted space.
26957b952d6SSatish Balay 
27057b952d6SSatish Balay 
27157b952d6SSatish Balay        This could be done better.
27257b952d6SSatish Balay   */
27357b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
27457b952d6SSatish Balay   CHKPTRQ(rvalues);
27557b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
27657b952d6SSatish Balay   CHKPTRQ(recv_waits);
27757b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
27857b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
27957b952d6SSatish Balay               comm,recv_waits+i);
28057b952d6SSatish Balay   }
28157b952d6SSatish Balay 
28257b952d6SSatish Balay   /* do sends:
28357b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
28457b952d6SSatish Balay          the ith processor
28557b952d6SSatish Balay   */
28657b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
28757b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
28857b952d6SSatish Balay   CHKPTRQ(send_waits);
28957b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
29057b952d6SSatish Balay   starts[0] = 0;
29157b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
29257b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
29357b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
29457b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
29557b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
29657b952d6SSatish Balay   }
29757b952d6SSatish Balay   PetscFree(owner);
29857b952d6SSatish Balay   starts[0] = 0;
29957b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
30057b952d6SSatish Balay   count = 0;
30157b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
30257b952d6SSatish Balay     if (procs[i]) {
30357b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
30457b952d6SSatish Balay                 comm,send_waits+count++);
30557b952d6SSatish Balay     }
30657b952d6SSatish Balay   }
30757b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
30857b952d6SSatish Balay 
30957b952d6SSatish Balay   /* Free cache space */
310d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
31157b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
31257b952d6SSatish Balay 
31357b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
31457b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
31557b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
31657b952d6SSatish Balay   baij->rmax       = nmax;
31757b952d6SSatish Balay 
31857b952d6SSatish Balay   return 0;
31957b952d6SSatish Balay }
32057b952d6SSatish Balay 
32157b952d6SSatish Balay 
322*026e39d0SSatish Balay #undef __FUNCTION__
323*026e39d0SSatish Balay #define __FUNCTION__ "MatAssemblyEnd_MPIBAIJ"
32457b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
32557b952d6SSatish Balay {
32657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
32757b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
32857b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
32957b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
33057b952d6SSatish Balay   Scalar      *values,val;
33157b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
33257b952d6SSatish Balay 
33357b952d6SSatish Balay   /*  wait on receives */
33457b952d6SSatish Balay   while (count) {
33557b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
33657b952d6SSatish Balay     /* unpack receives into our local space */
33757b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
33857b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
33957b952d6SSatish Balay     n = n/3;
34057b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
34157b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
34257b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
34357b952d6SSatish Balay       val = values[3*i+2];
34457b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
34557b952d6SSatish Balay         col -= baij->cstart*bs;
34657b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
34757b952d6SSatish Balay       }
34857b952d6SSatish Balay       else {
34957b952d6SSatish Balay         if (mat->was_assembled) {
350905e6a2fSBarry Smith           if (!baij->colmap) {
351905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
352905e6a2fSBarry Smith           }
353905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
35457b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
35557b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
35657b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
35757b952d6SSatish Balay           }
35857b952d6SSatish Balay         }
35957b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
36057b952d6SSatish Balay       }
36157b952d6SSatish Balay     }
36257b952d6SSatish Balay     count--;
36357b952d6SSatish Balay   }
36457b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
36557b952d6SSatish Balay 
36657b952d6SSatish Balay   /* wait on sends */
36757b952d6SSatish Balay   if (baij->nsends) {
36857b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
36957b952d6SSatish Balay     CHKPTRQ(send_status);
37057b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
37157b952d6SSatish Balay     PetscFree(send_status);
37257b952d6SSatish Balay   }
37357b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
37457b952d6SSatish Balay 
37557b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
37657b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
37757b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
37857b952d6SSatish Balay 
37957b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
38057b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
38157b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
38257b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
38357b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
38457b952d6SSatish Balay   }
38557b952d6SSatish Balay 
3866d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
38757b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
38857b952d6SSatish Balay   }
38957b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
39057b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
39157b952d6SSatish Balay 
39257b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
39357b952d6SSatish Balay   return 0;
39457b952d6SSatish Balay }
39557b952d6SSatish Balay 
396*026e39d0SSatish Balay #undef __FUNCTION__
397*026e39d0SSatish Balay #define __FUNCTION__ "MatView_MPIBAIJ_Binary"
39857b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
39957b952d6SSatish Balay {
40057b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
40157b952d6SSatish Balay   int          ierr;
40257b952d6SSatish Balay 
40357b952d6SSatish Balay   if (baij->size == 1) {
40457b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
40557b952d6SSatish Balay   }
40657b952d6SSatish Balay   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
40757b952d6SSatish Balay   return 0;
40857b952d6SSatish Balay }
40957b952d6SSatish Balay 
410*026e39d0SSatish Balay #undef __FUNCTION__
411*026e39d0SSatish Balay #define __FUNCTION__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
41257b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
41357b952d6SSatish Balay {
41457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
415cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
41657b952d6SSatish Balay   FILE         *fd;
41757b952d6SSatish Balay   ViewerType   vtype;
41857b952d6SSatish Balay 
41957b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
42057b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
42157b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
422639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
4234e220ebcSLois Curfman McInnes       MatInfo info;
42457b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
42557b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
4264e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
42757b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
42857b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
4294e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
4304e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
4314e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
4324e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
4334e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
4344e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
43557b952d6SSatish Balay       fflush(fd);
43657b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
43757b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
43857b952d6SSatish Balay       return 0;
43957b952d6SSatish Balay     }
440639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
441bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
44257b952d6SSatish Balay       return 0;
44357b952d6SSatish Balay     }
44457b952d6SSatish Balay   }
44557b952d6SSatish Balay 
44657b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
44757b952d6SSatish Balay     Draw       draw;
44857b952d6SSatish Balay     PetscTruth isnull;
44957b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
45057b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
45157b952d6SSatish Balay   }
45257b952d6SSatish Balay 
45357b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
45457b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
45557b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
45657b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
45757b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
45857b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
45957b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
46057b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
46157b952d6SSatish Balay     fflush(fd);
46257b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
46357b952d6SSatish Balay   }
46457b952d6SSatish Balay   else {
46557b952d6SSatish Balay     int size = baij->size;
46657b952d6SSatish Balay     rank = baij->rank;
46757b952d6SSatish Balay     if (size == 1) {
46857b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
46957b952d6SSatish Balay     }
47057b952d6SSatish Balay     else {
47157b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
47257b952d6SSatish Balay       Mat         A;
47357b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
47457b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
47557b952d6SSatish Balay       int         mbs=baij->mbs;
47657b952d6SSatish Balay       Scalar      *a;
47757b952d6SSatish Balay 
47857b952d6SSatish Balay       if (!rank) {
479cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
48057b952d6SSatish Balay         CHKERRQ(ierr);
48157b952d6SSatish Balay       }
48257b952d6SSatish Balay       else {
483cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
48457b952d6SSatish Balay         CHKERRQ(ierr);
48557b952d6SSatish Balay       }
48657b952d6SSatish Balay       PLogObjectParent(mat,A);
48757b952d6SSatish Balay 
48857b952d6SSatish Balay       /* copy over the A part */
48957b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
49057b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
49157b952d6SSatish Balay       row = baij->rstart;
49257b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
49357b952d6SSatish Balay 
49457b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
49557b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
49657b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
49757b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
49857b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
49957b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
500cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
501cee3aa6bSSatish Balay             col++; a += bs;
50257b952d6SSatish Balay           }
50357b952d6SSatish Balay         }
50457b952d6SSatish Balay       }
50557b952d6SSatish Balay       /* copy over the B part */
50657b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
50757b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
50857b952d6SSatish Balay       row = baij->rstart*bs;
50957b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
51057b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
51157b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
51257b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
51357b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
51457b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
515cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
516cee3aa6bSSatish Balay             col++; a += bs;
51757b952d6SSatish Balay           }
51857b952d6SSatish Balay         }
51957b952d6SSatish Balay       }
52057b952d6SSatish Balay       PetscFree(rvals);
5216d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5226d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
52357b952d6SSatish Balay       if (!rank) {
52457b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
52557b952d6SSatish Balay       }
52657b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
52757b952d6SSatish Balay     }
52857b952d6SSatish Balay   }
52957b952d6SSatish Balay   return 0;
53057b952d6SSatish Balay }
53157b952d6SSatish Balay 
53257b952d6SSatish Balay 
53357b952d6SSatish Balay 
534*026e39d0SSatish Balay #undef __FUNCTION__
535*026e39d0SSatish Balay #define __FUNCTION__ "MatView_MPIBAIJ"
53657b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
53757b952d6SSatish Balay {
53857b952d6SSatish Balay   Mat         mat = (Mat) obj;
53957b952d6SSatish Balay   int         ierr;
54057b952d6SSatish Balay   ViewerType  vtype;
54157b952d6SSatish Balay 
54257b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
54357b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
54457b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
54557b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
54657b952d6SSatish Balay   }
54757b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
54857b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
54957b952d6SSatish Balay   }
55057b952d6SSatish Balay   return 0;
55157b952d6SSatish Balay }
55257b952d6SSatish Balay 
553*026e39d0SSatish Balay #undef __FUNCTION__
554*026e39d0SSatish Balay #define __FUNCTION__ "MatDestroy_MPIBAIJ"
55579bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
55679bdfe76SSatish Balay {
55779bdfe76SSatish Balay   Mat         mat = (Mat) obj;
55879bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
55979bdfe76SSatish Balay   int         ierr;
56079bdfe76SSatish Balay 
56179bdfe76SSatish Balay #if defined(PETSC_LOG)
56279bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
56379bdfe76SSatish Balay #endif
56479bdfe76SSatish Balay 
56579bdfe76SSatish Balay   PetscFree(baij->rowners);
56679bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
56779bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
56879bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
56979bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
57079bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
57179bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
57279bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
57379bdfe76SSatish Balay   PetscFree(baij);
57490f02eecSBarry Smith   if (mat->mapping) {
57590f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
57690f02eecSBarry Smith   }
57779bdfe76SSatish Balay   PLogObjectDestroy(mat);
57879bdfe76SSatish Balay   PetscHeaderDestroy(mat);
57979bdfe76SSatish Balay   return 0;
58079bdfe76SSatish Balay }
58179bdfe76SSatish Balay 
582*026e39d0SSatish Balay #undef __FUNCTION__
583*026e39d0SSatish Balay #define __FUNCTION__ "MatMult_MPIBAIJ"
584cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
585cee3aa6bSSatish Balay {
586cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
58747b4a8eaSLois Curfman McInnes   int         ierr, nt;
588cee3aa6bSSatish Balay 
589c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
59047b4a8eaSLois Curfman McInnes   if (nt != a->n) {
5910ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and xx");
59247b4a8eaSLois Curfman McInnes   }
593c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
59447b4a8eaSLois Curfman McInnes   if (nt != a->m) {
5950ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and yy");
59647b4a8eaSLois Curfman McInnes   }
59743a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
59943a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
60143a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602cee3aa6bSSatish Balay   return 0;
603cee3aa6bSSatish Balay }
604cee3aa6bSSatish Balay 
605*026e39d0SSatish Balay #undef __FUNCTION__
606*026e39d0SSatish Balay #define __FUNCTION__ "MatMultAdd_MPIBAIJ"
607cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
608cee3aa6bSSatish Balay {
609cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
610cee3aa6bSSatish Balay   int        ierr;
61143a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
612cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
61343a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
614cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
615cee3aa6bSSatish Balay   return 0;
616cee3aa6bSSatish Balay }
617cee3aa6bSSatish Balay 
618*026e39d0SSatish Balay #undef __FUNCTION__
619*026e39d0SSatish Balay #define __FUNCTION__ "MatMultTrans_MPIBAIJ"
620cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
621cee3aa6bSSatish Balay {
622cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
623cee3aa6bSSatish Balay   int        ierr;
624cee3aa6bSSatish Balay 
625cee3aa6bSSatish Balay   /* do nondiagonal part */
626cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
627cee3aa6bSSatish Balay   /* send it on its way */
628537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
629cee3aa6bSSatish Balay   /* do local part */
630cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
631cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
632cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
633cee3aa6bSSatish Balay   /* but is not perhaps always true. */
634639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
635cee3aa6bSSatish Balay   return 0;
636cee3aa6bSSatish Balay }
637cee3aa6bSSatish Balay 
638*026e39d0SSatish Balay #undef __FUNCTION__
639*026e39d0SSatish Balay #define __FUNCTION__ "MatMultTransAdd_MPIBAIJ"
640cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
641cee3aa6bSSatish Balay {
642cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
643cee3aa6bSSatish Balay   int        ierr;
644cee3aa6bSSatish Balay 
645cee3aa6bSSatish Balay   /* do nondiagonal part */
646cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
647cee3aa6bSSatish Balay   /* send it on its way */
648537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
649cee3aa6bSSatish Balay   /* do local part */
650cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
651cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
652cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
653cee3aa6bSSatish Balay   /* but is not perhaps always true. */
654537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
655cee3aa6bSSatish Balay   return 0;
656cee3aa6bSSatish Balay }
657cee3aa6bSSatish Balay 
658cee3aa6bSSatish Balay /*
659cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
660cee3aa6bSSatish Balay    diagonal block
661cee3aa6bSSatish Balay */
662*026e39d0SSatish Balay #undef __FUNCTION__
663*026e39d0SSatish Balay #define __FUNCTION__ "MatGetDiagonal_MPIBAIJ"
664cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
665cee3aa6bSSatish Balay {
666cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
667cee3aa6bSSatish Balay   if (a->M != a->N)
668cee3aa6bSSatish Balay     SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block");
669cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
670cee3aa6bSSatish Balay }
671cee3aa6bSSatish Balay 
672*026e39d0SSatish Balay #undef __FUNCTION__
673*026e39d0SSatish Balay #define __FUNCTION__ "MatScale_MPIBAIJ"
674cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
675cee3aa6bSSatish Balay {
676cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
677cee3aa6bSSatish Balay   int        ierr;
678cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
679cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
680cee3aa6bSSatish Balay   return 0;
681cee3aa6bSSatish Balay }
682*026e39d0SSatish Balay 
683*026e39d0SSatish Balay #undef __FUNCTION__
684*026e39d0SSatish Balay #define __FUNCTION__ "MatGetSize_MPIBAIJ"
68557b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
68657b952d6SSatish Balay {
68757b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
68857b952d6SSatish Balay   *m = mat->M; *n = mat->N;
68957b952d6SSatish Balay   return 0;
69057b952d6SSatish Balay }
69157b952d6SSatish Balay 
692*026e39d0SSatish Balay #undef __FUNCTION__
693*026e39d0SSatish Balay #define __FUNCTION__ "MatGetLocalSize_MPIBAIJ"
69457b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
69557b952d6SSatish Balay {
69657b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
69757b952d6SSatish Balay   *m = mat->m; *n = mat->N;
69857b952d6SSatish Balay   return 0;
69957b952d6SSatish Balay }
70057b952d6SSatish Balay 
701*026e39d0SSatish Balay #undef __FUNCTION__
702*026e39d0SSatish Balay #define __FUNCTION__ "MatGetOwnershipRange_MPIBAIJ"
70357b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
70457b952d6SSatish Balay {
70557b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
70657b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
70757b952d6SSatish Balay   return 0;
70857b952d6SSatish Balay }
70957b952d6SSatish Balay 
710acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
711acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
712acdf5bf4SSatish Balay 
713*026e39d0SSatish Balay #undef __FUNCTION__
714*026e39d0SSatish Balay #define __FUNCTION__ "MatGetRow_MPIBAIJ"
715acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
716acdf5bf4SSatish Balay {
717acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
718acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
719acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
720d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
721d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
722acdf5bf4SSatish Balay 
723acdf5bf4SSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active");
724acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
725acdf5bf4SSatish Balay 
726acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
727acdf5bf4SSatish Balay     /*
728acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
729acdf5bf4SSatish Balay     */
730acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
731bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
732bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
733acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
734acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
735acdf5bf4SSatish Balay     }
736acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
737acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
738acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
739acdf5bf4SSatish Balay   }
740acdf5bf4SSatish Balay 
741acdf5bf4SSatish Balay 
742d9d09a02SSatish Balay   if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows")
743d9d09a02SSatish Balay   lrow = row - brstart;
744acdf5bf4SSatish Balay 
745acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
746acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
747acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
748acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
749acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
750acdf5bf4SSatish Balay   nztot = nzA + nzB;
751acdf5bf4SSatish Balay 
752acdf5bf4SSatish Balay   cmap  = mat->garray;
753acdf5bf4SSatish Balay   if (v  || idx) {
754acdf5bf4SSatish Balay     if (nztot) {
755acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
756acdf5bf4SSatish Balay       int imark = -1;
757acdf5bf4SSatish Balay       if (v) {
758acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
759acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
760d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
761acdf5bf4SSatish Balay           else break;
762acdf5bf4SSatish Balay         }
763acdf5bf4SSatish Balay         imark = i;
764acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
765acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
766acdf5bf4SSatish Balay       }
767acdf5bf4SSatish Balay       if (idx) {
768acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
769acdf5bf4SSatish Balay         if (imark > -1) {
770acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
771bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
772acdf5bf4SSatish Balay           }
773acdf5bf4SSatish Balay         } else {
774acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
775d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
776d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
777acdf5bf4SSatish Balay             else break;
778acdf5bf4SSatish Balay           }
779acdf5bf4SSatish Balay           imark = i;
780acdf5bf4SSatish Balay         }
781d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
782d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
783acdf5bf4SSatish Balay       }
784acdf5bf4SSatish Balay     }
785d212a18eSSatish Balay     else {
786d212a18eSSatish Balay       if (idx) *idx = 0;
787d212a18eSSatish Balay       if (v)   *v   = 0;
788d212a18eSSatish Balay     }
789acdf5bf4SSatish Balay   }
790acdf5bf4SSatish Balay   *nz = nztot;
791acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
792acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
793acdf5bf4SSatish Balay   return 0;
794acdf5bf4SSatish Balay }
795acdf5bf4SSatish Balay 
796*026e39d0SSatish Balay #undef __FUNCTION__
797*026e39d0SSatish Balay #define __FUNCTION__ "MatRestoreRow_MPIBAIJ"
798acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
799acdf5bf4SSatish Balay {
800acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
801acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
802acdf5bf4SSatish Balay     SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called");
803acdf5bf4SSatish Balay   }
804acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
805acdf5bf4SSatish Balay   return 0;
806acdf5bf4SSatish Balay }
807acdf5bf4SSatish Balay 
808*026e39d0SSatish Balay #undef __FUNCTION__
809*026e39d0SSatish Balay #define __FUNCTION__ "MatGetBlockSize_MPIBAIJ"
8105a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
8115a838052SSatish Balay {
8125a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
8135a838052SSatish Balay   *bs = baij->bs;
8145a838052SSatish Balay   return 0;
8155a838052SSatish Balay }
8165a838052SSatish Balay 
817*026e39d0SSatish Balay #undef __FUNCTION__
818*026e39d0SSatish Balay #define __FUNCTION__ "MatZeroEntries_MPIBAIJ"
81958667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A)
82058667388SSatish Balay {
82158667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
82258667388SSatish Balay   int         ierr;
82358667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
82458667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
82558667388SSatish Balay   return 0;
82658667388SSatish Balay }
8270ac07820SSatish Balay 
828*026e39d0SSatish Balay #undef __FUNCTION__
829*026e39d0SSatish Balay #define __FUNCTION__ "MatGetInfo_MPIBAIJ"
8304e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
8310ac07820SSatish Balay {
8324e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
8334e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
8347d57db60SLois Curfman McInnes   int         ierr;
8357d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
8360ac07820SSatish Balay 
8374e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
8384e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
8394e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
8404e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
8414e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
8424e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
8434e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
8444e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
8454e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
8460ac07820SSatish Balay   if (flag == MAT_LOCAL) {
8474e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
8484e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
8494e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
8504e220ebcSLois Curfman McInnes     info->memory       = isend[3];
8514e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
8520ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
8530ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm);
8544e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8554e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8564e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8574e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8584e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8590ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
8600ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm);
8614e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8624e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8634e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8644e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8654e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8660ac07820SSatish Balay   }
8674e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
8684e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
8694e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
8700ac07820SSatish Balay   return 0;
8710ac07820SSatish Balay }
8720ac07820SSatish Balay 
873*026e39d0SSatish Balay #undef __FUNCTION__
874*026e39d0SSatish Balay #define __FUNCTION__ "MatSetOption_MPIBAIJ"
87558667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op)
87658667388SSatish Balay {
87758667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
87858667388SSatish Balay 
87958667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
88058667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
8816da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
882b1fbbac0SLois Curfman McInnes       op == MAT_COLUMNS_SORTED) {
883b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
884b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
885b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
886aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
88758667388SSatish Balay         MatSetOption(a->A,op);
88858667388SSatish Balay         MatSetOption(a->B,op);
889b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
8906da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
89158667388SSatish Balay              op == MAT_SYMMETRIC ||
89258667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
89358667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
89458667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
89558667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
89658667388SSatish Balay     a->roworiented = 0;
89758667388SSatish Balay     MatSetOption(a->A,op);
89858667388SSatish Balay     MatSetOption(a->B,op);
89990f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
90090f02eecSBarry Smith     a->donotstash = 1;
90190f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
9020ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:MAT_NO_NEW_DIAGONALS");}
90358667388SSatish Balay   else
9040ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:unknown option");}
90558667388SSatish Balay   return 0;
90658667388SSatish Balay }
90758667388SSatish Balay 
908*026e39d0SSatish Balay #undef __FUNCTION__
909*026e39d0SSatish Balay #define __FUNCTION__ "MatTranspose_MPIBAIJ("
9100ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
9110ac07820SSatish Balay {
9120ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
9130ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
9140ac07820SSatish Balay   Mat        B;
9150ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
9160ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
9170ac07820SSatish Balay   Scalar     *a;
9180ac07820SSatish Balay 
9190ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
9200ac07820SSatish Balay     SETERRQ(1,"MatTranspose_MPIBAIJ:Square matrix only for in-place");
9210ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
9220ac07820SSatish Balay   CHKERRQ(ierr);
9230ac07820SSatish Balay 
9240ac07820SSatish Balay   /* copy over the A part */
9250ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
9260ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
9270ac07820SSatish Balay   row = baij->rstart;
9280ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
9290ac07820SSatish Balay 
9300ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
9310ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
9320ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
9330ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
9340ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
9350ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
9360ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
9370ac07820SSatish Balay         col++; a += bs;
9380ac07820SSatish Balay       }
9390ac07820SSatish Balay     }
9400ac07820SSatish Balay   }
9410ac07820SSatish Balay   /* copy over the B part */
9420ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
9430ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
9440ac07820SSatish Balay   row = baij->rstart*bs;
9450ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
9460ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
9470ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
9480ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
9490ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
9500ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
9510ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
9520ac07820SSatish Balay         col++; a += bs;
9530ac07820SSatish Balay       }
9540ac07820SSatish Balay     }
9550ac07820SSatish Balay   }
9560ac07820SSatish Balay   PetscFree(rvals);
9570ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
9580ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
9590ac07820SSatish Balay 
9600ac07820SSatish Balay   if (matout != PETSC_NULL) {
9610ac07820SSatish Balay     *matout = B;
9620ac07820SSatish Balay   } else {
9630ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
9640ac07820SSatish Balay     PetscFree(baij->rowners);
9650ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
9660ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
9670ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
9680ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
9690ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
9700ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
9710ac07820SSatish Balay     PetscFree(baij);
9720ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
9730ac07820SSatish Balay     PetscHeaderDestroy(B);
9740ac07820SSatish Balay   }
9750ac07820SSatish Balay   return 0;
9760ac07820SSatish Balay }
9770e95ebc0SSatish Balay 
978*026e39d0SSatish Balay #undef __FUNCTION__
979*026e39d0SSatish Balay #define __FUNCTION__ "MatDiagonalScale_MPIBAIJ"
9800e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
9810e95ebc0SSatish Balay {
9820e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
9830e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
9840e95ebc0SSatish Balay   int ierr,s1,s2,s3;
9850e95ebc0SSatish Balay 
9860e95ebc0SSatish Balay   if (ll)  {
9870e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
9880e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
9890e95ebc0SSatish Balay     if (s1!=s2) SETERRQ(1,"MatDiagonalScale_MPIBAIJ: non-conforming local sizes");
9900e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
9910e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
9920e95ebc0SSatish Balay   }
9930e95ebc0SSatish Balay   if (rr) SETERRQ(1,"MatDiagonalScale_MPIBAIJ:not supported for right vector");
9940e95ebc0SSatish Balay   return 0;
9950e95ebc0SSatish Balay }
9960e95ebc0SSatish Balay 
9970ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
9980ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
9990ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
10000ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
10010ac07820SSatish Balay    routine.
10020ac07820SSatish Balay */
1003*026e39d0SSatish Balay #undef __FUNCTION__
1004*026e39d0SSatish Balay #define __FUNCTION__ "MatZeroRows_MPIBAIJ"
10050ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
10060ac07820SSatish Balay {
10070ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
10080ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
10090ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
10100ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
10110ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
10120ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
10130ac07820SSatish Balay   MPI_Comm       comm = A->comm;
10140ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
10150ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
10160ac07820SSatish Balay   IS             istmp;
10170ac07820SSatish Balay 
10180ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
10190ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
10200ac07820SSatish Balay 
10210ac07820SSatish Balay   /*  first count number of contributors to each processor */
10220ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
10230ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
10240ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
10250ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
10260ac07820SSatish Balay     idx = rows[i];
10270ac07820SSatish Balay     found = 0;
10280ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
10290ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
10300ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
10310ac07820SSatish Balay       }
10320ac07820SSatish Balay     }
10330ac07820SSatish Balay     if (!found) SETERRQ(1,"MatZeroRows_MPIBAIJ:Index out of range");
10340ac07820SSatish Balay   }
10350ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
10360ac07820SSatish Balay 
10370ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
10380ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
10390ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
10400ac07820SSatish Balay   nrecvs = work[rank];
10410ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
10420ac07820SSatish Balay   nmax = work[rank];
10430ac07820SSatish Balay   PetscFree(work);
10440ac07820SSatish Balay 
10450ac07820SSatish Balay   /* post receives:   */
10460ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
10470ac07820SSatish Balay   CHKPTRQ(rvalues);
10480ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
10490ac07820SSatish Balay   CHKPTRQ(recv_waits);
10500ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
10510ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
10520ac07820SSatish Balay   }
10530ac07820SSatish Balay 
10540ac07820SSatish Balay   /* do sends:
10550ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
10560ac07820SSatish Balay          the ith processor
10570ac07820SSatish Balay   */
10580ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
10590ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
10600ac07820SSatish Balay   CHKPTRQ(send_waits);
10610ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
10620ac07820SSatish Balay   starts[0] = 0;
10630ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
10640ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
10650ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
10660ac07820SSatish Balay   }
10670ac07820SSatish Balay   ISRestoreIndices(is,&rows);
10680ac07820SSatish Balay 
10690ac07820SSatish Balay   starts[0] = 0;
10700ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
10710ac07820SSatish Balay   count = 0;
10720ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
10730ac07820SSatish Balay     if (procs[i]) {
10740ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
10750ac07820SSatish Balay     }
10760ac07820SSatish Balay   }
10770ac07820SSatish Balay   PetscFree(starts);
10780ac07820SSatish Balay 
10790ac07820SSatish Balay   base = owners[rank]*bs;
10800ac07820SSatish Balay 
10810ac07820SSatish Balay   /*  wait on receives */
10820ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
10830ac07820SSatish Balay   source = lens + nrecvs;
10840ac07820SSatish Balay   count  = nrecvs; slen = 0;
10850ac07820SSatish Balay   while (count) {
10860ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
10870ac07820SSatish Balay     /* unpack receives into our local space */
10880ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
10890ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
10900ac07820SSatish Balay     lens[imdex]  = n;
10910ac07820SSatish Balay     slen += n;
10920ac07820SSatish Balay     count--;
10930ac07820SSatish Balay   }
10940ac07820SSatish Balay   PetscFree(recv_waits);
10950ac07820SSatish Balay 
10960ac07820SSatish Balay   /* move the data into the send scatter */
10970ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
10980ac07820SSatish Balay   count = 0;
10990ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
11000ac07820SSatish Balay     values = rvalues + i*nmax;
11010ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
11020ac07820SSatish Balay       lrows[count++] = values[j] - base;
11030ac07820SSatish Balay     }
11040ac07820SSatish Balay   }
11050ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
11060ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
11070ac07820SSatish Balay 
11080ac07820SSatish Balay   /* actually zap the local rows */
1109537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
11100ac07820SSatish Balay   PLogObjectParent(A,istmp);
11110ac07820SSatish Balay   PetscFree(lrows);
11120ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
11130ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
11140ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
11150ac07820SSatish Balay 
11160ac07820SSatish Balay   /* wait on sends */
11170ac07820SSatish Balay   if (nsends) {
11180ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
11190ac07820SSatish Balay     CHKPTRQ(send_status);
11200ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
11210ac07820SSatish Balay     PetscFree(send_status);
11220ac07820SSatish Balay   }
11230ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
11240ac07820SSatish Balay 
11250ac07820SSatish Balay   return 0;
11260ac07820SSatish Balay }
1127ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
1128*026e39d0SSatish Balay #undef __FUNCTION__
1129*026e39d0SSatish Balay #define __FUNCTION__ "MatPrintHelp_MPIBAIJ"
1130ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A)
1131ba4ca20aSSatish Balay {
1132ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1133ba4ca20aSSatish Balay 
1134ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1135ba4ca20aSSatish Balay   else return 0;
1136ba4ca20aSSatish Balay }
11370ac07820SSatish Balay 
11380ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
11390ac07820SSatish Balay 
114079bdfe76SSatish Balay /* -------------------------------------------------------------------*/
114179bdfe76SSatish Balay static struct _MatOps MatOps = {
1142bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
11434c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
11444c50302cSBarry Smith   0,0,0,0,
11450ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
11460e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
114758667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
11484c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
11494c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
11504c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1151905e6a2fSBarry Smith   0,0,0,MatConvertSameType_MPIBAIJ,0,0,
1152d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1153ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
11544c50302cSBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ};
115579bdfe76SSatish Balay 
115679bdfe76SSatish Balay 
1157*026e39d0SSatish Balay #undef __FUNCTION__
1158*026e39d0SSatish Balay #define __FUNCTION__ "MatCreateMPIBAIJ"
115979bdfe76SSatish Balay /*@C
116079bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
116179bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
116279bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
116379bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
116479bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
116579bdfe76SSatish Balay 
116679bdfe76SSatish Balay    Input Parameters:
116779bdfe76SSatish Balay .  comm - MPI communicator
116879bdfe76SSatish Balay .  bs   - size of blockk
116979bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
117092e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
117192e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
117292e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
117392e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
117492e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
117579bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
117692e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
117779bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
117879bdfe76SSatish Balay            submatrix  (same for all local rows)
117992e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
118092e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
118192e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
118292e8d321SLois Curfman McInnes            it is zero.
118392e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
118479bdfe76SSatish Balay            submatrix (same for all local rows).
118592e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
118692e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
118792e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
118879bdfe76SSatish Balay 
118979bdfe76SSatish Balay    Output Parameter:
119079bdfe76SSatish Balay .  A - the matrix
119179bdfe76SSatish Balay 
119279bdfe76SSatish Balay    Notes:
119379bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
119479bdfe76SSatish Balay    (possibly both).
119579bdfe76SSatish Balay 
119679bdfe76SSatish Balay    Storage Information:
119779bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
119879bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
119979bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
120079bdfe76SSatish Balay    local matrix (a rectangular submatrix).
120179bdfe76SSatish Balay 
120279bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
120379bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
120479bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
120579bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
120679bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
120779bdfe76SSatish Balay 
120879bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
120979bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
121079bdfe76SSatish Balay 
121179bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
121279bdfe76SSatish Balay $         -------------------
121379bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
121479bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
121579bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
121679bdfe76SSatish Balay $         -------------------
121779bdfe76SSatish Balay $
121879bdfe76SSatish Balay 
121979bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
122079bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
122179bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
122257b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
122379bdfe76SSatish Balay 
122479bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
122579bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
122679bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
122792e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
122892e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
12296da5968aSLois Curfman McInnes    matrices.
123079bdfe76SSatish Balay 
123192e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
123279bdfe76SSatish Balay 
123379bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
123479bdfe76SSatish Balay @*/
123579bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
123679bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
123779bdfe76SSatish Balay {
123879bdfe76SSatish Balay   Mat          B;
123979bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
12404c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
124179bdfe76SSatish Balay 
124279bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
124379bdfe76SSatish Balay   *A = 0;
124479bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
124579bdfe76SSatish Balay   PLogObjectCreate(B);
124679bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
124779bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
124879bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
12494c50302cSBarry Smith   MPI_Comm_size(comm,&size);
12504c50302cSBarry Smith   if (size == 1) {
12514c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
12524c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
12534c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
12544c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
12554c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
12564c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
12574c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
12584c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
12594c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
12604c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
12614c50302cSBarry Smith   }
12624c50302cSBarry Smith 
126379bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
126479bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
126590f02eecSBarry Smith   B->mapping    = 0;
126679bdfe76SSatish Balay   B->factor     = 0;
126779bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
126879bdfe76SSatish Balay 
126979bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
127079bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
127179bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
127279bdfe76SSatish Balay 
127379bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
127457b952d6SSatish Balay     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1275cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified");
1276cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified");
1277cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1278cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
127979bdfe76SSatish Balay 
128079bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
128179bdfe76SSatish Balay     work[0] = m; work[1] = n;
128279bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
128379bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
128479bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
128579bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
128679bdfe76SSatish Balay   }
128779bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
128879bdfe76SSatish Balay     Mbs = M/bs;
128979bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
129079bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
129179bdfe76SSatish Balay     m   = mbs*bs;
129279bdfe76SSatish Balay   }
129379bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
129479bdfe76SSatish Balay     Nbs = N/bs;
129579bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
129679bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
129779bdfe76SSatish Balay     n   = nbs*bs;
129879bdfe76SSatish Balay   }
1299cee3aa6bSSatish Balay   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
130079bdfe76SSatish Balay 
130179bdfe76SSatish Balay   b->m = m; B->m = m;
130279bdfe76SSatish Balay   b->n = n; B->n = n;
130379bdfe76SSatish Balay   b->N = N; B->N = N;
130479bdfe76SSatish Balay   b->M = M; B->M = M;
130579bdfe76SSatish Balay   b->bs  = bs;
130679bdfe76SSatish Balay   b->bs2 = bs*bs;
130779bdfe76SSatish Balay   b->mbs = mbs;
130879bdfe76SSatish Balay   b->nbs = nbs;
130979bdfe76SSatish Balay   b->Mbs = Mbs;
131079bdfe76SSatish Balay   b->Nbs = Nbs;
131179bdfe76SSatish Balay 
131279bdfe76SSatish Balay   /* build local table of row and column ownerships */
131379bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
131479bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
13150ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
131679bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
131779bdfe76SSatish Balay   b->rowners[0] = 0;
131879bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
131979bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
132079bdfe76SSatish Balay   }
132179bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
132279bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
13234fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
13244fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
13254fa0d573SSatish Balay 
132679bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
132779bdfe76SSatish Balay   b->cowners[0] = 0;
132879bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
132979bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
133079bdfe76SSatish Balay   }
133179bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
133279bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
13334fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
13344fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
133579bdfe76SSatish Balay 
133679bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
133779bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
133879bdfe76SSatish Balay   PLogObjectParent(B,b->A);
133979bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
134079bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
134179bdfe76SSatish Balay   PLogObjectParent(B,b->B);
134279bdfe76SSatish Balay 
134379bdfe76SSatish Balay   /* build cache for off array entries formed */
134479bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
134590f02eecSBarry Smith   b->donotstash  = 0;
134679bdfe76SSatish Balay   b->colmap      = 0;
134779bdfe76SSatish Balay   b->garray      = 0;
134879bdfe76SSatish Balay   b->roworiented = 1;
134979bdfe76SSatish Balay 
135079bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
135179bdfe76SSatish Balay   b->lvec      = 0;
135279bdfe76SSatish Balay   b->Mvctx     = 0;
135379bdfe76SSatish Balay 
135479bdfe76SSatish Balay   /* stuff for MatGetRow() */
135579bdfe76SSatish Balay   b->rowindices   = 0;
135679bdfe76SSatish Balay   b->rowvalues    = 0;
135779bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
135879bdfe76SSatish Balay 
135979bdfe76SSatish Balay   *A = B;
136079bdfe76SSatish Balay   return 0;
136179bdfe76SSatish Balay }
1362*026e39d0SSatish Balay 
1363*026e39d0SSatish Balay #undef __FUNCTION__
1364*026e39d0SSatish Balay #define __FUNCTION__ "MatConvertSameType_MPIBAIJ"
13650ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
13660ac07820SSatish Balay {
13670ac07820SSatish Balay   Mat        mat;
13680ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
13690ac07820SSatish Balay   int        ierr, len=0, flg;
13700ac07820SSatish Balay 
13710ac07820SSatish Balay   *newmat       = 0;
13720ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
13730ac07820SSatish Balay   PLogObjectCreate(mat);
13740ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
13750ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
13760ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
13770ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
13780ac07820SSatish Balay   mat->factor     = matin->factor;
13790ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
13800ac07820SSatish Balay 
13810ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
13820ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
13830ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
13840ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
13850ac07820SSatish Balay 
13860ac07820SSatish Balay   a->bs  = oldmat->bs;
13870ac07820SSatish Balay   a->bs2 = oldmat->bs2;
13880ac07820SSatish Balay   a->mbs = oldmat->mbs;
13890ac07820SSatish Balay   a->nbs = oldmat->nbs;
13900ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
13910ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
13920ac07820SSatish Balay 
13930ac07820SSatish Balay   a->rstart       = oldmat->rstart;
13940ac07820SSatish Balay   a->rend         = oldmat->rend;
13950ac07820SSatish Balay   a->cstart       = oldmat->cstart;
13960ac07820SSatish Balay   a->cend         = oldmat->cend;
13970ac07820SSatish Balay   a->size         = oldmat->size;
13980ac07820SSatish Balay   a->rank         = oldmat->rank;
13990ac07820SSatish Balay   a->insertmode   = NOT_SET_VALUES;
14000ac07820SSatish Balay   a->rowvalues    = 0;
14010ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
14020ac07820SSatish Balay 
14030ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
14040ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
14050ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
14060ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
14070ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14080ac07820SSatish Balay   if (oldmat->colmap) {
14090ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
14100ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
14110ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
14120ac07820SSatish Balay   } else a->colmap = 0;
14130ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
14140ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
14150ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
14160ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
14170ac07820SSatish Balay   } else a->garray = 0;
14180ac07820SSatish Balay 
14190ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
14200ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
14210ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
14220ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
14230ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
14240ac07820SSatish Balay   PLogObjectParent(mat,a->A);
14250ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
14260ac07820SSatish Balay   PLogObjectParent(mat,a->B);
14270ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
14280ac07820SSatish Balay   if (flg) {
14290ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
14300ac07820SSatish Balay   }
14310ac07820SSatish Balay   *newmat = mat;
14320ac07820SSatish Balay   return 0;
14330ac07820SSatish Balay }
143457b952d6SSatish Balay 
143557b952d6SSatish Balay #include "sys.h"
143657b952d6SSatish Balay 
1437*026e39d0SSatish Balay #undef __FUNCTION__
1438*026e39d0SSatish Balay #define __FUNCTION__ "MatLoad_MPIBAIJ"
143957b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
144057b952d6SSatish Balay {
144157b952d6SSatish Balay   Mat          A;
144257b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
144357b952d6SSatish Balay   Scalar       *vals,*buf;
144457b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
144557b952d6SSatish Balay   MPI_Status   status;
1446cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
144757b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
144857b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
144957b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
145057b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
145157b952d6SSatish Balay 
145257b952d6SSatish Balay 
145357b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
145457b952d6SSatish Balay   bs2  = bs*bs;
145557b952d6SSatish Balay 
145657b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
145757b952d6SSatish Balay   if (!rank) {
145857b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
145957b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1460cee3aa6bSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object");
146157b952d6SSatish Balay   }
146257b952d6SSatish Balay 
146357b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
146457b952d6SSatish Balay   M = header[1]; N = header[2];
146557b952d6SSatish Balay 
1466b0267e0aSLois Curfman McInnes   if (M != N) SETERRQ(1,"MatLoad_MPIBAIJ:Can only do square matrices");
146757b952d6SSatish Balay 
146857b952d6SSatish Balay   /*
146957b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
147057b952d6SSatish Balay      divisible by the blocksize
147157b952d6SSatish Balay   */
147257b952d6SSatish Balay   Mbs        = M/bs;
147357b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
147457b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
147557b952d6SSatish Balay   else                  Mbs++;
147657b952d6SSatish Balay   if (extra_rows &&!rank) {
1477b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
147857b952d6SSatish Balay   }
1479537820f0SBarry Smith 
148057b952d6SSatish Balay   /* determine ownership of all rows */
148157b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
148257b952d6SSatish Balay   m   = mbs * bs;
1483cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1484cee3aa6bSSatish Balay   browners = rowners + size + 1;
148557b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
148657b952d6SSatish Balay   rowners[0] = 0;
1487cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1488cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
148957b952d6SSatish Balay   rstart = rowners[rank];
149057b952d6SSatish Balay   rend   = rowners[rank+1];
149157b952d6SSatish Balay 
149257b952d6SSatish Balay   /* distribute row lengths to all processors */
149357b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
149457b952d6SSatish Balay   if (!rank) {
149557b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
149657b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
149757b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
149857b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1499cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1500cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
150157b952d6SSatish Balay     PetscFree(sndcounts);
150257b952d6SSatish Balay   }
150357b952d6SSatish Balay   else {
150457b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
150557b952d6SSatish Balay   }
150657b952d6SSatish Balay 
150757b952d6SSatish Balay   if (!rank) {
150857b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
150957b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
151057b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
151157b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
151257b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
151357b952d6SSatish Balay         procsnz[i] += rowlengths[j];
151457b952d6SSatish Balay       }
151557b952d6SSatish Balay     }
151657b952d6SSatish Balay     PetscFree(rowlengths);
151757b952d6SSatish Balay 
151857b952d6SSatish Balay     /* determine max buffer needed and allocate it */
151957b952d6SSatish Balay     maxnz = 0;
152057b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
152157b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
152257b952d6SSatish Balay     }
152357b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
152457b952d6SSatish Balay 
152557b952d6SSatish Balay     /* read in my part of the matrix column indices  */
152657b952d6SSatish Balay     nz = procsnz[0];
152757b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
152857b952d6SSatish Balay     mycols = ibuf;
1529cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
153057b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1531cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1532cee3aa6bSSatish Balay 
153357b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
153457b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
153557b952d6SSatish Balay       nz = procsnz[i];
153657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
153757b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
153857b952d6SSatish Balay     }
153957b952d6SSatish Balay     /* read in the stuff for the last proc */
154057b952d6SSatish Balay     if ( size != 1 ) {
154157b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
154257b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
154357b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1544cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
154557b952d6SSatish Balay     }
154657b952d6SSatish Balay     PetscFree(cols);
154757b952d6SSatish Balay   }
154857b952d6SSatish Balay   else {
154957b952d6SSatish Balay     /* determine buffer space needed for message */
155057b952d6SSatish Balay     nz = 0;
155157b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
155257b952d6SSatish Balay       nz += locrowlens[i];
155357b952d6SSatish Balay     }
155457b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
155557b952d6SSatish Balay     mycols = ibuf;
155657b952d6SSatish Balay     /* receive message of column indices*/
155757b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
155857b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1559cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
156057b952d6SSatish Balay   }
156157b952d6SSatish Balay 
156257b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1563cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1564cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
156557b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1566cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
156757b952d6SSatish Balay   masked1 = mask    + Mbs;
156857b952d6SSatish Balay   masked2 = masked1 + Mbs;
156957b952d6SSatish Balay   rowcount = 0; nzcount = 0;
157057b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
157157b952d6SSatish Balay     dcount  = 0;
157257b952d6SSatish Balay     odcount = 0;
157357b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
157457b952d6SSatish Balay       kmax = locrowlens[rowcount];
157557b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
157657b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
157757b952d6SSatish Balay         if (!mask[tmp]) {
157857b952d6SSatish Balay           mask[tmp] = 1;
157957b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
158057b952d6SSatish Balay           else masked1[dcount++] = tmp;
158157b952d6SSatish Balay         }
158257b952d6SSatish Balay       }
158357b952d6SSatish Balay       rowcount++;
158457b952d6SSatish Balay     }
1585cee3aa6bSSatish Balay 
158657b952d6SSatish Balay     dlens[i]  = dcount;
158757b952d6SSatish Balay     odlens[i] = odcount;
1588cee3aa6bSSatish Balay 
158957b952d6SSatish Balay     /* zero out the mask elements we set */
159057b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
159157b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
159257b952d6SSatish Balay   }
1593cee3aa6bSSatish Balay 
159457b952d6SSatish Balay   /* create our matrix */
1595537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1596537820f0SBarry Smith          CHKERRQ(ierr);
159757b952d6SSatish Balay   A = *newmat;
15986d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
159957b952d6SSatish Balay 
160057b952d6SSatish Balay   if (!rank) {
160157b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
160257b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
160357b952d6SSatish Balay     nz = procsnz[0];
160457b952d6SSatish Balay     vals = buf;
1605cee3aa6bSSatish Balay     mycols = ibuf;
1606cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
160757b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1608cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1609537820f0SBarry Smith 
161057b952d6SSatish Balay     /* insert into matrix */
161157b952d6SSatish Balay     jj      = rstart*bs;
161257b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
161357b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
161457b952d6SSatish Balay       mycols += locrowlens[i];
161557b952d6SSatish Balay       vals   += locrowlens[i];
161657b952d6SSatish Balay       jj++;
161757b952d6SSatish Balay     }
161857b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
161957b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
162057b952d6SSatish Balay       nz = procsnz[i];
162157b952d6SSatish Balay       vals = buf;
162257b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
162357b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
162457b952d6SSatish Balay     }
162557b952d6SSatish Balay     /* the last proc */
162657b952d6SSatish Balay     if ( size != 1 ){
162757b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1628cee3aa6bSSatish Balay       vals = buf;
162957b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
163057b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1631cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
163257b952d6SSatish Balay     }
163357b952d6SSatish Balay     PetscFree(procsnz);
163457b952d6SSatish Balay   }
163557b952d6SSatish Balay   else {
163657b952d6SSatish Balay     /* receive numeric values */
163757b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
163857b952d6SSatish Balay 
163957b952d6SSatish Balay     /* receive message of values*/
164057b952d6SSatish Balay     vals = buf;
1641cee3aa6bSSatish Balay     mycols = ibuf;
164257b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
164357b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1644cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
164557b952d6SSatish Balay 
164657b952d6SSatish Balay     /* insert into matrix */
164757b952d6SSatish Balay     jj      = rstart*bs;
1648cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
164957b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
165057b952d6SSatish Balay       mycols += locrowlens[i];
165157b952d6SSatish Balay       vals   += locrowlens[i];
165257b952d6SSatish Balay       jj++;
165357b952d6SSatish Balay     }
165457b952d6SSatish Balay   }
165557b952d6SSatish Balay   PetscFree(locrowlens);
165657b952d6SSatish Balay   PetscFree(buf);
165757b952d6SSatish Balay   PetscFree(ibuf);
165857b952d6SSatish Balay   PetscFree(rowners);
165957b952d6SSatish Balay   PetscFree(dlens);
1660cee3aa6bSSatish Balay   PetscFree(mask);
16616d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
16626d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
166357b952d6SSatish Balay   return 0;
166457b952d6SSatish Balay }
166557b952d6SSatish Balay 
166657b952d6SSatish Balay 
1667