xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 2c3acbe9ddcb1424b0c1a2c5c7ff7d317d17c150)
179bdfe76SSatish Balay #ifndef lint
2*2c3acbe9SBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.38 1996/12/01 17:55:24 curfman Exp bsmith $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2357b952d6SSatish Balay 
243b2fbd54SBarry Smith 
25537820f0SBarry Smith /*
26537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2957b952d6SSatish Balay    length of colmap equals the global matrix length.
3057b952d6SSatish Balay */
3157b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3257b952d6SSatish Balay {
3357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3457b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
35928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3657b952d6SSatish Balay 
3757b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
3857b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3957b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
40928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4157b952d6SSatish Balay   return 0;
4257b952d6SSatish Balay }
4357b952d6SSatish Balay 
443b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
453b2fbd54SBarry Smith                             PetscTruth *done)
4657b952d6SSatish Balay {
473b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
4857b952d6SSatish Balay   int         ierr;
493b2fbd54SBarry Smith   if (aij->size == 1) {
503b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
513b2fbd54SBarry Smith   } else SETERRQ(1,"MatGetRowIJ_MPIBAIJ:not supported in parallel");
523b2fbd54SBarry Smith   return 0;
533b2fbd54SBarry Smith }
543b2fbd54SBarry Smith 
553b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
563b2fbd54SBarry Smith                                 PetscTruth *done)
573b2fbd54SBarry Smith {
583b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
593b2fbd54SBarry Smith   int        ierr;
603b2fbd54SBarry Smith   if (aij->size == 1) {
613b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
623b2fbd54SBarry Smith   } else SETERRQ(1,"MatRestoreRowIJ_MPIBAIJ:not supported in parallel");
6357b952d6SSatish Balay   return 0;
6457b952d6SSatish Balay }
6557b952d6SSatish Balay 
66639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
6757b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
6857b952d6SSatish Balay {
6957b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
7057b952d6SSatish Balay   Scalar      value;
7157b952d6SSatish Balay   int         ierr,i,j, rstart = baij->rstart, rend = baij->rend;
7257b952d6SSatish Balay   int         cstart = baij->cstart, cend = baij->cend,row,col;
7357b952d6SSatish Balay   int         roworiented = baij->roworiented,rstart_orig,rend_orig;
7457b952d6SSatish Balay   int         cstart_orig,cend_orig,bs=baij->bs;
7557b952d6SSatish Balay 
76*2c3acbe9SBarry Smith #if defined(PETSC_BOPT_g)
7757b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
7857b952d6SSatish Balay     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
7957b952d6SSatish Balay   }
80*2c3acbe9SBarry Smith #endif
8157b952d6SSatish Balay   baij->insertmode = addv;
8257b952d6SSatish Balay   rstart_orig      = rstart*bs;
8357b952d6SSatish Balay   rend_orig        = rend*bs;
8457b952d6SSatish Balay   cstart_orig      = cstart*bs;
8557b952d6SSatish Balay   cend_orig        = cend*bs;
8657b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
87639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
8857b952d6SSatish Balay     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
8957b952d6SSatish Balay     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
90639f9d9dSBarry Smith #endif
9157b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
9257b952d6SSatish Balay       row = im[i] - rstart_orig;
9357b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
9457b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
9557b952d6SSatish Balay           col = in[j] - cstart_orig;
9657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
97639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
9857b952d6SSatish Balay         }
99639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
100639f9d9dSBarry Smith         else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");}
101639f9d9dSBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");}
102639f9d9dSBarry Smith #endif
10357b952d6SSatish Balay         else {
10457b952d6SSatish Balay           if (mat->was_assembled) {
105905e6a2fSBarry Smith             if (!baij->colmap) {
106905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
107905e6a2fSBarry Smith             }
108905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
10957b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
11057b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
11157b952d6SSatish Balay               col =  in[j];
11257b952d6SSatish Balay             }
11357b952d6SSatish Balay           }
11457b952d6SSatish Balay           else col = in[j];
11557b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
116639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
11757b952d6SSatish Balay         }
11857b952d6SSatish Balay       }
11957b952d6SSatish Balay     }
12057b952d6SSatish Balay     else {
12190f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
12257b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
12357b952d6SSatish Balay       }
12457b952d6SSatish Balay       else {
12590f02eecSBarry Smith         if (!baij->donotstash) {
12657b952d6SSatish Balay           row = im[i];
12757b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
12857b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
12957b952d6SSatish Balay           }
13057b952d6SSatish Balay         }
13157b952d6SSatish Balay       }
13257b952d6SSatish Balay     }
13390f02eecSBarry Smith   }
13457b952d6SSatish Balay   return 0;
13557b952d6SSatish Balay }
13657b952d6SSatish Balay 
137d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
138d6de1c52SSatish Balay {
139d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
140d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
141d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
142d6de1c52SSatish Balay 
143d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
144d6de1c52SSatish Balay     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row");
145d6de1c52SSatish Balay     if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large");
146d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
147d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
148d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
149d6de1c52SSatish Balay         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column");
150d6de1c52SSatish Balay         if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large");
151d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
152d6de1c52SSatish Balay           col = idxn[j] - bscstart;
153d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
154d6de1c52SSatish Balay         }
155d6de1c52SSatish Balay         else {
156905e6a2fSBarry Smith           if (!baij->colmap) {
157905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
158905e6a2fSBarry Smith           }
159e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
160e60e1c95SSatish Balay              (baij->garray[baij->colmap[idxn[j]/bs]-1] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
161d9d09a02SSatish Balay           else {
162905e6a2fSBarry Smith             col  = (baij->colmap[idxn[j]/bs]-1)*bs + idxn[j]%bs;
163d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
164d6de1c52SSatish Balay           }
165d6de1c52SSatish Balay         }
166d6de1c52SSatish Balay       }
167d9d09a02SSatish Balay     }
168d6de1c52SSatish Balay     else {
169d6de1c52SSatish Balay       SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported");
170d6de1c52SSatish Balay     }
171d6de1c52SSatish Balay   }
172d6de1c52SSatish Balay   return 0;
173d6de1c52SSatish Balay }
174d6de1c52SSatish Balay 
175d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
176d6de1c52SSatish Balay {
177d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
178d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
179acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
180d6de1c52SSatish Balay   double     sum = 0.0;
181d6de1c52SSatish Balay   Scalar     *v;
182d6de1c52SSatish Balay 
183d6de1c52SSatish Balay   if (baij->size == 1) {
184d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
185d6de1c52SSatish Balay   } else {
186d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
187d6de1c52SSatish Balay       v = amat->a;
188d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
189d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
190d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
191d6de1c52SSatish Balay #else
192d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
193d6de1c52SSatish Balay #endif
194d6de1c52SSatish Balay       }
195d6de1c52SSatish Balay       v = bmat->a;
196d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
197d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
198d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
199d6de1c52SSatish Balay #else
200d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
201d6de1c52SSatish Balay #endif
202d6de1c52SSatish Balay       }
203d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
204d6de1c52SSatish Balay       *norm = sqrt(*norm);
205d6de1c52SSatish Balay     }
206acdf5bf4SSatish Balay     else
207b0267e0aSLois Curfman McInnes       SETERRQ(PETSC_ERR_SUP,"MatNorm_MPIBAIJ:No support for this norm yet");
208d6de1c52SSatish Balay   }
209d6de1c52SSatish Balay   return 0;
210d6de1c52SSatish Balay }
21157b952d6SSatish Balay 
21257b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
21357b952d6SSatish Balay {
21457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
21557b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
21657b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
21757b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
21857b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
21957b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
22057b952d6SSatish Balay   InsertMode  addv;
22157b952d6SSatish Balay   Scalar      *rvalues,*svalues;
22257b952d6SSatish Balay 
22357b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
22457b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
22557b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
22657b952d6SSatish Balay     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
22757b952d6SSatish Balay   }
22857b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
22957b952d6SSatish Balay 
23057b952d6SSatish Balay   /*  first count number of contributors to each processor */
23157b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
23257b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
23357b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
23457b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
23557b952d6SSatish Balay     idx = baij->stash.idx[i];
23657b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
23757b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
23857b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
23957b952d6SSatish Balay       }
24057b952d6SSatish Balay     }
24157b952d6SSatish Balay   }
24257b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
24357b952d6SSatish Balay 
24457b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
24557b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
24657b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
24757b952d6SSatish Balay   nreceives = work[rank];
24857b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
24957b952d6SSatish Balay   nmax = work[rank];
25057b952d6SSatish Balay   PetscFree(work);
25157b952d6SSatish Balay 
25257b952d6SSatish Balay   /* post receives:
25357b952d6SSatish Balay        1) each message will consist of ordered pairs
25457b952d6SSatish Balay      (global index,value) we store the global index as a double
25557b952d6SSatish Balay      to simplify the message passing.
25657b952d6SSatish Balay        2) since we don't know how long each individual message is we
25757b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
25857b952d6SSatish Balay      this is a lot of wasted space.
25957b952d6SSatish Balay 
26057b952d6SSatish Balay 
26157b952d6SSatish Balay        This could be done better.
26257b952d6SSatish Balay   */
26357b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
26457b952d6SSatish Balay   CHKPTRQ(rvalues);
26557b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
26657b952d6SSatish Balay   CHKPTRQ(recv_waits);
26757b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
26857b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
26957b952d6SSatish Balay               comm,recv_waits+i);
27057b952d6SSatish Balay   }
27157b952d6SSatish Balay 
27257b952d6SSatish Balay   /* do sends:
27357b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
27457b952d6SSatish Balay          the ith processor
27557b952d6SSatish Balay   */
27657b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
27757b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
27857b952d6SSatish Balay   CHKPTRQ(send_waits);
27957b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
28057b952d6SSatish Balay   starts[0] = 0;
28157b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
28257b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
28357b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
28457b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
28557b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
28657b952d6SSatish Balay   }
28757b952d6SSatish Balay   PetscFree(owner);
28857b952d6SSatish Balay   starts[0] = 0;
28957b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
29057b952d6SSatish Balay   count = 0;
29157b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
29257b952d6SSatish Balay     if (procs[i]) {
29357b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
29457b952d6SSatish Balay                 comm,send_waits+count++);
29557b952d6SSatish Balay     }
29657b952d6SSatish Balay   }
29757b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
29857b952d6SSatish Balay 
29957b952d6SSatish Balay   /* Free cache space */
300d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
30157b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
30257b952d6SSatish Balay 
30357b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
30457b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
30557b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
30657b952d6SSatish Balay   baij->rmax       = nmax;
30757b952d6SSatish Balay 
30857b952d6SSatish Balay   return 0;
30957b952d6SSatish Balay }
31057b952d6SSatish Balay 
31157b952d6SSatish Balay 
31257b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
31357b952d6SSatish Balay {
31457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
31557b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
31657b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
31757b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
31857b952d6SSatish Balay   Scalar      *values,val;
31957b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
32057b952d6SSatish Balay 
32157b952d6SSatish Balay   /*  wait on receives */
32257b952d6SSatish Balay   while (count) {
32357b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
32457b952d6SSatish Balay     /* unpack receives into our local space */
32557b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
32657b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
32757b952d6SSatish Balay     n = n/3;
32857b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
32957b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
33057b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
33157b952d6SSatish Balay       val = values[3*i+2];
33257b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
33357b952d6SSatish Balay         col -= baij->cstart*bs;
33457b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
33557b952d6SSatish Balay       }
33657b952d6SSatish Balay       else {
33757b952d6SSatish Balay         if (mat->was_assembled) {
338905e6a2fSBarry Smith           if (!baij->colmap) {
339905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
340905e6a2fSBarry Smith           }
341905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
34257b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
34357b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
34457b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
34557b952d6SSatish Balay           }
34657b952d6SSatish Balay         }
34757b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
34857b952d6SSatish Balay       }
34957b952d6SSatish Balay     }
35057b952d6SSatish Balay     count--;
35157b952d6SSatish Balay   }
35257b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
35357b952d6SSatish Balay 
35457b952d6SSatish Balay   /* wait on sends */
35557b952d6SSatish Balay   if (baij->nsends) {
35657b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
35757b952d6SSatish Balay     CHKPTRQ(send_status);
35857b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
35957b952d6SSatish Balay     PetscFree(send_status);
36057b952d6SSatish Balay   }
36157b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
36257b952d6SSatish Balay 
36357b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
36457b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
36557b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
36657b952d6SSatish Balay 
36757b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
36857b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
36957b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
37057b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
37157b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
37257b952d6SSatish Balay   }
37357b952d6SSatish Balay 
3746d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
37557b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
37657b952d6SSatish Balay   }
37757b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
37857b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
37957b952d6SSatish Balay 
38057b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
38157b952d6SSatish Balay   return 0;
38257b952d6SSatish Balay }
38357b952d6SSatish Balay 
38457b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
38557b952d6SSatish Balay {
38657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
38757b952d6SSatish Balay   int          ierr;
38857b952d6SSatish Balay 
38957b952d6SSatish Balay   if (baij->size == 1) {
39057b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
39157b952d6SSatish Balay   }
39257b952d6SSatish Balay   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
39357b952d6SSatish Balay   return 0;
39457b952d6SSatish Balay }
39557b952d6SSatish Balay 
39657b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
39757b952d6SSatish Balay {
39857b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
399cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
40057b952d6SSatish Balay   FILE         *fd;
40157b952d6SSatish Balay   ViewerType   vtype;
40257b952d6SSatish Balay 
40357b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
40457b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
40557b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
406639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
4074e220ebcSLois Curfman McInnes       MatInfo info;
40857b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
40957b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
4104e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
41157b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
41257b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
4134e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
4144e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
4154e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
4164e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
4174e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
4184e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
41957b952d6SSatish Balay       fflush(fd);
42057b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
42157b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
42257b952d6SSatish Balay       return 0;
42357b952d6SSatish Balay     }
424639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
425bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
42657b952d6SSatish Balay       return 0;
42757b952d6SSatish Balay     }
42857b952d6SSatish Balay   }
42957b952d6SSatish Balay 
43057b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
43157b952d6SSatish Balay     Draw       draw;
43257b952d6SSatish Balay     PetscTruth isnull;
43357b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
43457b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
43557b952d6SSatish Balay   }
43657b952d6SSatish Balay 
43757b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
43857b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
43957b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
44057b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
44157b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
44257b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
44357b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
44457b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
44557b952d6SSatish Balay     fflush(fd);
44657b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
44757b952d6SSatish Balay   }
44857b952d6SSatish Balay   else {
44957b952d6SSatish Balay     int size = baij->size;
45057b952d6SSatish Balay     rank = baij->rank;
45157b952d6SSatish Balay     if (size == 1) {
45257b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
45357b952d6SSatish Balay     }
45457b952d6SSatish Balay     else {
45557b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
45657b952d6SSatish Balay       Mat         A;
45757b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
45857b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
45957b952d6SSatish Balay       int         mbs=baij->mbs;
46057b952d6SSatish Balay       Scalar      *a;
46157b952d6SSatish Balay 
46257b952d6SSatish Balay       if (!rank) {
463cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
46457b952d6SSatish Balay         CHKERRQ(ierr);
46557b952d6SSatish Balay       }
46657b952d6SSatish Balay       else {
467cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
46857b952d6SSatish Balay         CHKERRQ(ierr);
46957b952d6SSatish Balay       }
47057b952d6SSatish Balay       PLogObjectParent(mat,A);
47157b952d6SSatish Balay 
47257b952d6SSatish Balay       /* copy over the A part */
47357b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
47457b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
47557b952d6SSatish Balay       row = baij->rstart;
47657b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
47757b952d6SSatish Balay 
47857b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
47957b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
48057b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
48157b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
48257b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
48357b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
484cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
485cee3aa6bSSatish Balay             col++; a += bs;
48657b952d6SSatish Balay           }
48757b952d6SSatish Balay         }
48857b952d6SSatish Balay       }
48957b952d6SSatish Balay       /* copy over the B part */
49057b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
49157b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
49257b952d6SSatish Balay       row = baij->rstart*bs;
49357b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
49457b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
49557b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
49657b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
49757b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
49857b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
499cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
500cee3aa6bSSatish Balay             col++; a += bs;
50157b952d6SSatish Balay           }
50257b952d6SSatish Balay         }
50357b952d6SSatish Balay       }
50457b952d6SSatish Balay       PetscFree(rvals);
5056d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5066d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
50757b952d6SSatish Balay       if (!rank) {
50857b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
50957b952d6SSatish Balay       }
51057b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
51157b952d6SSatish Balay     }
51257b952d6SSatish Balay   }
51357b952d6SSatish Balay   return 0;
51457b952d6SSatish Balay }
51557b952d6SSatish Balay 
51657b952d6SSatish Balay 
51757b952d6SSatish Balay 
51857b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
51957b952d6SSatish Balay {
52057b952d6SSatish Balay   Mat         mat = (Mat) obj;
52157b952d6SSatish Balay   int         ierr;
52257b952d6SSatish Balay   ViewerType  vtype;
52357b952d6SSatish Balay 
52457b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
52557b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
52657b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
52757b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
52857b952d6SSatish Balay   }
52957b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
53057b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
53157b952d6SSatish Balay   }
53257b952d6SSatish Balay   return 0;
53357b952d6SSatish Balay }
53457b952d6SSatish Balay 
53579bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
53679bdfe76SSatish Balay {
53779bdfe76SSatish Balay   Mat         mat = (Mat) obj;
53879bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
53979bdfe76SSatish Balay   int         ierr;
54079bdfe76SSatish Balay 
54179bdfe76SSatish Balay #if defined(PETSC_LOG)
54279bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
54379bdfe76SSatish Balay #endif
54479bdfe76SSatish Balay 
54579bdfe76SSatish Balay   PetscFree(baij->rowners);
54679bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
54779bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
54879bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
54979bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
55079bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
55179bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
55279bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
55379bdfe76SSatish Balay   PetscFree(baij);
55490f02eecSBarry Smith   if (mat->mapping) {
55590f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
55690f02eecSBarry Smith   }
55779bdfe76SSatish Balay   PLogObjectDestroy(mat);
55879bdfe76SSatish Balay   PetscHeaderDestroy(mat);
55979bdfe76SSatish Balay   return 0;
56079bdfe76SSatish Balay }
56179bdfe76SSatish Balay 
562cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
563cee3aa6bSSatish Balay {
564cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
56547b4a8eaSLois Curfman McInnes   int         ierr, nt;
566cee3aa6bSSatish Balay 
567c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
56847b4a8eaSLois Curfman McInnes   if (nt != a->n) {
5690ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and xx");
57047b4a8eaSLois Curfman McInnes   }
571c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
57247b4a8eaSLois Curfman McInnes   if (nt != a->m) {
5730ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and yy");
57447b4a8eaSLois Curfman McInnes   }
57543a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
576cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
57743a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
578cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
57943a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
580cee3aa6bSSatish Balay   return 0;
581cee3aa6bSSatish Balay }
582cee3aa6bSSatish Balay 
583cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
584cee3aa6bSSatish Balay {
585cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
586cee3aa6bSSatish Balay   int        ierr;
58743a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
588cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
58943a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
590cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
591cee3aa6bSSatish Balay   return 0;
592cee3aa6bSSatish Balay }
593cee3aa6bSSatish Balay 
594cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
595cee3aa6bSSatish Balay {
596cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
597cee3aa6bSSatish Balay   int        ierr;
598cee3aa6bSSatish Balay 
599cee3aa6bSSatish Balay   /* do nondiagonal part */
600cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
601cee3aa6bSSatish Balay   /* send it on its way */
602537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603cee3aa6bSSatish Balay   /* do local part */
604cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
605cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
606cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
607cee3aa6bSSatish Balay   /* but is not perhaps always true. */
608639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
609cee3aa6bSSatish Balay   return 0;
610cee3aa6bSSatish Balay }
611cee3aa6bSSatish Balay 
612cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
613cee3aa6bSSatish Balay {
614cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
615cee3aa6bSSatish Balay   int        ierr;
616cee3aa6bSSatish Balay 
617cee3aa6bSSatish Balay   /* do nondiagonal part */
618cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
619cee3aa6bSSatish Balay   /* send it on its way */
620537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
621cee3aa6bSSatish Balay   /* do local part */
622cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
623cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
624cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
625cee3aa6bSSatish Balay   /* but is not perhaps always true. */
626537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
627cee3aa6bSSatish Balay   return 0;
628cee3aa6bSSatish Balay }
629cee3aa6bSSatish Balay 
630cee3aa6bSSatish Balay /*
631cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
632cee3aa6bSSatish Balay    diagonal block
633cee3aa6bSSatish Balay */
634cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
635cee3aa6bSSatish Balay {
636cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
637cee3aa6bSSatish Balay   if (a->M != a->N)
638cee3aa6bSSatish Balay     SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block");
639cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
640cee3aa6bSSatish Balay }
641cee3aa6bSSatish Balay 
642cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
643cee3aa6bSSatish Balay {
644cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
645cee3aa6bSSatish Balay   int        ierr;
646cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
647cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
648cee3aa6bSSatish Balay   return 0;
649cee3aa6bSSatish Balay }
65057b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
65157b952d6SSatish Balay {
65257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
65357b952d6SSatish Balay   *m = mat->M; *n = mat->N;
65457b952d6SSatish Balay   return 0;
65557b952d6SSatish Balay }
65657b952d6SSatish Balay 
65757b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
65857b952d6SSatish Balay {
65957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
66057b952d6SSatish Balay   *m = mat->m; *n = mat->N;
66157b952d6SSatish Balay   return 0;
66257b952d6SSatish Balay }
66357b952d6SSatish Balay 
66457b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
66557b952d6SSatish Balay {
66657b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
66757b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
66857b952d6SSatish Balay   return 0;
66957b952d6SSatish Balay }
67057b952d6SSatish Balay 
671acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
672acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
673acdf5bf4SSatish Balay 
674acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
675acdf5bf4SSatish Balay {
676acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
677acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
678acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
679d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
680d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
681acdf5bf4SSatish Balay 
682acdf5bf4SSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active");
683acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
684acdf5bf4SSatish Balay 
685acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
686acdf5bf4SSatish Balay     /*
687acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
688acdf5bf4SSatish Balay     */
689acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
690bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
691bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
692acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
693acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
694acdf5bf4SSatish Balay     }
695acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
696acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
697acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
698acdf5bf4SSatish Balay   }
699acdf5bf4SSatish Balay 
700acdf5bf4SSatish Balay 
701d9d09a02SSatish Balay   if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows")
702d9d09a02SSatish Balay   lrow = row - brstart;
703acdf5bf4SSatish Balay 
704acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
705acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
706acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
707acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
708acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
709acdf5bf4SSatish Balay   nztot = nzA + nzB;
710acdf5bf4SSatish Balay 
711acdf5bf4SSatish Balay   cmap  = mat->garray;
712acdf5bf4SSatish Balay   if (v  || idx) {
713acdf5bf4SSatish Balay     if (nztot) {
714acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
715acdf5bf4SSatish Balay       int imark = -1;
716acdf5bf4SSatish Balay       if (v) {
717acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
718acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
719d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
720acdf5bf4SSatish Balay           else break;
721acdf5bf4SSatish Balay         }
722acdf5bf4SSatish Balay         imark = i;
723acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
724acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
725acdf5bf4SSatish Balay       }
726acdf5bf4SSatish Balay       if (idx) {
727acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
728acdf5bf4SSatish Balay         if (imark > -1) {
729acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
730bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
731acdf5bf4SSatish Balay           }
732acdf5bf4SSatish Balay         } else {
733acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
734d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
735d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
736acdf5bf4SSatish Balay             else break;
737acdf5bf4SSatish Balay           }
738acdf5bf4SSatish Balay           imark = i;
739acdf5bf4SSatish Balay         }
740d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
741d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
742acdf5bf4SSatish Balay       }
743acdf5bf4SSatish Balay     }
744d212a18eSSatish Balay     else {
745d212a18eSSatish Balay       if (idx) *idx = 0;
746d212a18eSSatish Balay       if (v)   *v   = 0;
747d212a18eSSatish Balay     }
748acdf5bf4SSatish Balay   }
749acdf5bf4SSatish Balay   *nz = nztot;
750acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
751acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
752acdf5bf4SSatish Balay   return 0;
753acdf5bf4SSatish Balay }
754acdf5bf4SSatish Balay 
755acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
756acdf5bf4SSatish Balay {
757acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
758acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
759acdf5bf4SSatish Balay     SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called");
760acdf5bf4SSatish Balay   }
761acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
762acdf5bf4SSatish Balay   return 0;
763acdf5bf4SSatish Balay }
764acdf5bf4SSatish Balay 
7655a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
7665a838052SSatish Balay {
7675a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
7685a838052SSatish Balay   *bs = baij->bs;
7695a838052SSatish Balay   return 0;
7705a838052SSatish Balay }
7715a838052SSatish Balay 
77258667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A)
77358667388SSatish Balay {
77458667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
77558667388SSatish Balay   int         ierr;
77658667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
77758667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
77858667388SSatish Balay   return 0;
77958667388SSatish Balay }
7800ac07820SSatish Balay 
7814e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
7820ac07820SSatish Balay {
7834e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
7844e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
7857d57db60SLois Curfman McInnes   int         ierr;
7867d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
7870ac07820SSatish Balay 
7884e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
7894e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
7904e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
7914e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
7924e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
7934e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
7944e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
7954e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
7964e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
7970ac07820SSatish Balay   if (flag == MAT_LOCAL) {
7984e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
7994e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
8004e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
8014e220ebcSLois Curfman McInnes     info->memory       = isend[3];
8024e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
8030ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
8040ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm);
8054e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8064e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8074e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8084e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8094e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8100ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
8110ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm);
8124e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8134e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8144e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8154e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8164e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8170ac07820SSatish Balay   }
8184e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
8194e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
8204e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
8210ac07820SSatish Balay   return 0;
8220ac07820SSatish Balay }
8230ac07820SSatish Balay 
82458667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op)
82558667388SSatish Balay {
82658667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
82758667388SSatish Balay 
82858667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
82958667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
8306da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
831b1fbbac0SLois Curfman McInnes       op == MAT_COLUMNS_SORTED) {
832b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
833b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
834b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
835aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
83658667388SSatish Balay         MatSetOption(a->A,op);
83758667388SSatish Balay         MatSetOption(a->B,op);
838b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
8396da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
84058667388SSatish Balay              op == MAT_SYMMETRIC ||
84158667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
84258667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
84358667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
84458667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
84558667388SSatish Balay     a->roworiented = 0;
84658667388SSatish Balay     MatSetOption(a->A,op);
84758667388SSatish Balay     MatSetOption(a->B,op);
84890f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
84990f02eecSBarry Smith     a->donotstash = 1;
85090f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
8510ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:MAT_NO_NEW_DIAGONALS");}
85258667388SSatish Balay   else
8530ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:unknown option");}
85458667388SSatish Balay   return 0;
85558667388SSatish Balay }
85658667388SSatish Balay 
8570ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
8580ac07820SSatish Balay {
8590ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
8600ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
8610ac07820SSatish Balay   Mat        B;
8620ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
8630ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
8640ac07820SSatish Balay   Scalar     *a;
8650ac07820SSatish Balay 
8660ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
8670ac07820SSatish Balay     SETERRQ(1,"MatTranspose_MPIBAIJ:Square matrix only for in-place");
8680ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
8690ac07820SSatish Balay   CHKERRQ(ierr);
8700ac07820SSatish Balay 
8710ac07820SSatish Balay   /* copy over the A part */
8720ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
8730ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
8740ac07820SSatish Balay   row = baij->rstart;
8750ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
8760ac07820SSatish Balay 
8770ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
8780ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
8790ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
8800ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
8810ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
8820ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
8830ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
8840ac07820SSatish Balay         col++; a += bs;
8850ac07820SSatish Balay       }
8860ac07820SSatish Balay     }
8870ac07820SSatish Balay   }
8880ac07820SSatish Balay   /* copy over the B part */
8890ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
8900ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
8910ac07820SSatish Balay   row = baij->rstart*bs;
8920ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
8930ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
8940ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
8950ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
8960ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
8970ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
8980ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
8990ac07820SSatish Balay         col++; a += bs;
9000ac07820SSatish Balay       }
9010ac07820SSatish Balay     }
9020ac07820SSatish Balay   }
9030ac07820SSatish Balay   PetscFree(rvals);
9040ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
9050ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
9060ac07820SSatish Balay 
9070ac07820SSatish Balay   if (matout != PETSC_NULL) {
9080ac07820SSatish Balay     *matout = B;
9090ac07820SSatish Balay   } else {
9100ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
9110ac07820SSatish Balay     PetscFree(baij->rowners);
9120ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
9130ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
9140ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
9150ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
9160ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
9170ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
9180ac07820SSatish Balay     PetscFree(baij);
9190ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
9200ac07820SSatish Balay     PetscHeaderDestroy(B);
9210ac07820SSatish Balay   }
9220ac07820SSatish Balay   return 0;
9230ac07820SSatish Balay }
9240e95ebc0SSatish Balay 
9250e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
9260e95ebc0SSatish Balay {
9270e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
9280e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
9290e95ebc0SSatish Balay   int ierr,s1,s2,s3;
9300e95ebc0SSatish Balay 
9310e95ebc0SSatish Balay   if (ll)  {
9320e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
9330e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
9340e95ebc0SSatish Balay     if (s1!=s2) SETERRQ(1,"MatDiagonalScale_MPIBAIJ: non-conforming local sizes");
9350e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
9360e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
9370e95ebc0SSatish Balay   }
9380e95ebc0SSatish Balay   if (rr) SETERRQ(1,"MatDiagonalScale_MPIBAIJ:not supported for right vector");
9390e95ebc0SSatish Balay   return 0;
9400e95ebc0SSatish Balay }
9410e95ebc0SSatish Balay 
9420ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
9430ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
9440ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
9450ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
9460ac07820SSatish Balay    routine.
9470ac07820SSatish Balay */
9480ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
9490ac07820SSatish Balay {
9500ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
9510ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
9520ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
9530ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
9540ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
9550ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
9560ac07820SSatish Balay   MPI_Comm       comm = A->comm;
9570ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
9580ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
9590ac07820SSatish Balay   IS             istmp;
9600ac07820SSatish Balay 
9610ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
9620ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
9630ac07820SSatish Balay 
9640ac07820SSatish Balay   /*  first count number of contributors to each processor */
9650ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
9660ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
9670ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
9680ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
9690ac07820SSatish Balay     idx = rows[i];
9700ac07820SSatish Balay     found = 0;
9710ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
9720ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
9730ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
9740ac07820SSatish Balay       }
9750ac07820SSatish Balay     }
9760ac07820SSatish Balay     if (!found) SETERRQ(1,"MatZeroRows_MPIBAIJ:Index out of range");
9770ac07820SSatish Balay   }
9780ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
9790ac07820SSatish Balay 
9800ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
9810ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
9820ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
9830ac07820SSatish Balay   nrecvs = work[rank];
9840ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
9850ac07820SSatish Balay   nmax = work[rank];
9860ac07820SSatish Balay   PetscFree(work);
9870ac07820SSatish Balay 
9880ac07820SSatish Balay   /* post receives:   */
9890ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
9900ac07820SSatish Balay   CHKPTRQ(rvalues);
9910ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
9920ac07820SSatish Balay   CHKPTRQ(recv_waits);
9930ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
9940ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
9950ac07820SSatish Balay   }
9960ac07820SSatish Balay 
9970ac07820SSatish Balay   /* do sends:
9980ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
9990ac07820SSatish Balay          the ith processor
10000ac07820SSatish Balay   */
10010ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
10020ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
10030ac07820SSatish Balay   CHKPTRQ(send_waits);
10040ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
10050ac07820SSatish Balay   starts[0] = 0;
10060ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
10070ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
10080ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
10090ac07820SSatish Balay   }
10100ac07820SSatish Balay   ISRestoreIndices(is,&rows);
10110ac07820SSatish Balay 
10120ac07820SSatish Balay   starts[0] = 0;
10130ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
10140ac07820SSatish Balay   count = 0;
10150ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
10160ac07820SSatish Balay     if (procs[i]) {
10170ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
10180ac07820SSatish Balay     }
10190ac07820SSatish Balay   }
10200ac07820SSatish Balay   PetscFree(starts);
10210ac07820SSatish Balay 
10220ac07820SSatish Balay   base = owners[rank]*bs;
10230ac07820SSatish Balay 
10240ac07820SSatish Balay   /*  wait on receives */
10250ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
10260ac07820SSatish Balay   source = lens + nrecvs;
10270ac07820SSatish Balay   count  = nrecvs; slen = 0;
10280ac07820SSatish Balay   while (count) {
10290ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
10300ac07820SSatish Balay     /* unpack receives into our local space */
10310ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
10320ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
10330ac07820SSatish Balay     lens[imdex]  = n;
10340ac07820SSatish Balay     slen += n;
10350ac07820SSatish Balay     count--;
10360ac07820SSatish Balay   }
10370ac07820SSatish Balay   PetscFree(recv_waits);
10380ac07820SSatish Balay 
10390ac07820SSatish Balay   /* move the data into the send scatter */
10400ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
10410ac07820SSatish Balay   count = 0;
10420ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
10430ac07820SSatish Balay     values = rvalues + i*nmax;
10440ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
10450ac07820SSatish Balay       lrows[count++] = values[j] - base;
10460ac07820SSatish Balay     }
10470ac07820SSatish Balay   }
10480ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
10490ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
10500ac07820SSatish Balay 
10510ac07820SSatish Balay   /* actually zap the local rows */
1052537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
10530ac07820SSatish Balay   PLogObjectParent(A,istmp);
10540ac07820SSatish Balay   PetscFree(lrows);
10550ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
10560ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
10570ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
10580ac07820SSatish Balay 
10590ac07820SSatish Balay   /* wait on sends */
10600ac07820SSatish Balay   if (nsends) {
10610ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
10620ac07820SSatish Balay     CHKPTRQ(send_status);
10630ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
10640ac07820SSatish Balay     PetscFree(send_status);
10650ac07820SSatish Balay   }
10660ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
10670ac07820SSatish Balay 
10680ac07820SSatish Balay   return 0;
10690ac07820SSatish Balay }
1070ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
1071ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A)
1072ba4ca20aSSatish Balay {
1073ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1074ba4ca20aSSatish Balay 
1075ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1076ba4ca20aSSatish Balay   else return 0;
1077ba4ca20aSSatish Balay }
10780ac07820SSatish Balay 
10790ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
10800ac07820SSatish Balay 
108179bdfe76SSatish Balay /* -------------------------------------------------------------------*/
108279bdfe76SSatish Balay static struct _MatOps MatOps = {
1083bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
10844c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
10854c50302cSBarry Smith   0,0,0,0,
10860ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
10870e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
108858667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
10894c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
10904c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
10914c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1092905e6a2fSBarry Smith   0,0,0,MatConvertSameType_MPIBAIJ,0,0,
1093d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1094ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
10954c50302cSBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ};
109679bdfe76SSatish Balay 
109779bdfe76SSatish Balay 
109879bdfe76SSatish Balay /*@C
109979bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
110079bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
110179bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
110279bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
110379bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
110479bdfe76SSatish Balay 
110579bdfe76SSatish Balay    Input Parameters:
110679bdfe76SSatish Balay .  comm - MPI communicator
110779bdfe76SSatish Balay .  bs   - size of blockk
110879bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
110992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
111092e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
111192e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
111292e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
111392e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
111479bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
111592e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
111679bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
111779bdfe76SSatish Balay            submatrix  (same for all local rows)
111892e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
111992e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
112092e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
112192e8d321SLois Curfman McInnes            it is zero.
112292e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
112379bdfe76SSatish Balay            submatrix (same for all local rows).
112492e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
112592e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
112692e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
112779bdfe76SSatish Balay 
112879bdfe76SSatish Balay    Output Parameter:
112979bdfe76SSatish Balay .  A - the matrix
113079bdfe76SSatish Balay 
113179bdfe76SSatish Balay    Notes:
113279bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
113379bdfe76SSatish Balay    (possibly both).
113479bdfe76SSatish Balay 
113579bdfe76SSatish Balay    Storage Information:
113679bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
113779bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
113879bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
113979bdfe76SSatish Balay    local matrix (a rectangular submatrix).
114079bdfe76SSatish Balay 
114179bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
114279bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
114379bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
114479bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
114579bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
114679bdfe76SSatish Balay 
114779bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
114879bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
114979bdfe76SSatish Balay 
115079bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
115179bdfe76SSatish Balay $         -------------------
115279bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
115379bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
115479bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
115579bdfe76SSatish Balay $         -------------------
115679bdfe76SSatish Balay $
115779bdfe76SSatish Balay 
115879bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
115979bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
116079bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
116157b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
116279bdfe76SSatish Balay 
116379bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
116479bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
116579bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
116692e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
116792e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
11686da5968aSLois Curfman McInnes    matrices.
116979bdfe76SSatish Balay 
117092e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
117179bdfe76SSatish Balay 
117279bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
117379bdfe76SSatish Balay @*/
117479bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
117579bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
117679bdfe76SSatish Balay {
117779bdfe76SSatish Balay   Mat          B;
117879bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
11794c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
118079bdfe76SSatish Balay 
118179bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
118279bdfe76SSatish Balay   *A = 0;
118379bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
118479bdfe76SSatish Balay   PLogObjectCreate(B);
118579bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
118679bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
118779bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
11884c50302cSBarry Smith   MPI_Comm_size(comm,&size);
11894c50302cSBarry Smith   if (size == 1) {
11904c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
11914c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
11924c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
11934c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
11944c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
11954c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
11964c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
11974c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
11984c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
11994c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
12004c50302cSBarry Smith   }
12014c50302cSBarry Smith 
120279bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
120379bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
120490f02eecSBarry Smith   B->mapping    = 0;
120579bdfe76SSatish Balay   B->factor     = 0;
120679bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
120779bdfe76SSatish Balay 
120879bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
120979bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
121079bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
121179bdfe76SSatish Balay 
121279bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
121357b952d6SSatish Balay     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1214cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified");
1215cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified");
1216cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1217cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
121879bdfe76SSatish Balay 
121979bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
122079bdfe76SSatish Balay     work[0] = m; work[1] = n;
122179bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
122279bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
122379bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
122479bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
122579bdfe76SSatish Balay   }
122679bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
122779bdfe76SSatish Balay     Mbs = M/bs;
122879bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
122979bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
123079bdfe76SSatish Balay     m   = mbs*bs;
123179bdfe76SSatish Balay   }
123279bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
123379bdfe76SSatish Balay     Nbs = N/bs;
123479bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
123579bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
123679bdfe76SSatish Balay     n   = nbs*bs;
123779bdfe76SSatish Balay   }
1238cee3aa6bSSatish Balay   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
123979bdfe76SSatish Balay 
124079bdfe76SSatish Balay   b->m = m; B->m = m;
124179bdfe76SSatish Balay   b->n = n; B->n = n;
124279bdfe76SSatish Balay   b->N = N; B->N = N;
124379bdfe76SSatish Balay   b->M = M; B->M = M;
124479bdfe76SSatish Balay   b->bs  = bs;
124579bdfe76SSatish Balay   b->bs2 = bs*bs;
124679bdfe76SSatish Balay   b->mbs = mbs;
124779bdfe76SSatish Balay   b->nbs = nbs;
124879bdfe76SSatish Balay   b->Mbs = Mbs;
124979bdfe76SSatish Balay   b->Nbs = Nbs;
125079bdfe76SSatish Balay 
125179bdfe76SSatish Balay   /* build local table of row and column ownerships */
125279bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
125379bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
12540ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
125579bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
125679bdfe76SSatish Balay   b->rowners[0] = 0;
125779bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
125879bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
125979bdfe76SSatish Balay   }
126079bdfe76SSatish Balay   b->rstart = b->rowners[b->rank];
126179bdfe76SSatish Balay   b->rend   = b->rowners[b->rank+1];
126279bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
126379bdfe76SSatish Balay   b->cowners[0] = 0;
126479bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
126579bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
126679bdfe76SSatish Balay   }
126779bdfe76SSatish Balay   b->cstart = b->cowners[b->rank];
126879bdfe76SSatish Balay   b->cend   = b->cowners[b->rank+1];
126979bdfe76SSatish Balay 
127079bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
127179bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
127279bdfe76SSatish Balay   PLogObjectParent(B,b->A);
127379bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
127479bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
127579bdfe76SSatish Balay   PLogObjectParent(B,b->B);
127679bdfe76SSatish Balay 
127779bdfe76SSatish Balay   /* build cache for off array entries formed */
127879bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
127990f02eecSBarry Smith   b->donotstash  = 0;
128079bdfe76SSatish Balay   b->colmap      = 0;
128179bdfe76SSatish Balay   b->garray      = 0;
128279bdfe76SSatish Balay   b->roworiented = 1;
128379bdfe76SSatish Balay 
128479bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
128579bdfe76SSatish Balay   b->lvec      = 0;
128679bdfe76SSatish Balay   b->Mvctx     = 0;
128779bdfe76SSatish Balay 
128879bdfe76SSatish Balay   /* stuff for MatGetRow() */
128979bdfe76SSatish Balay   b->rowindices   = 0;
129079bdfe76SSatish Balay   b->rowvalues    = 0;
129179bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
129279bdfe76SSatish Balay 
129379bdfe76SSatish Balay   *A = B;
129479bdfe76SSatish Balay   return 0;
129579bdfe76SSatish Balay }
12960ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
12970ac07820SSatish Balay {
12980ac07820SSatish Balay   Mat        mat;
12990ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
13000ac07820SSatish Balay   int        ierr, len=0, flg;
13010ac07820SSatish Balay 
13020ac07820SSatish Balay   *newmat       = 0;
13030ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
13040ac07820SSatish Balay   PLogObjectCreate(mat);
13050ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
13060ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
13070ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
13080ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
13090ac07820SSatish Balay   mat->factor     = matin->factor;
13100ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
13110ac07820SSatish Balay 
13120ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
13130ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
13140ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
13150ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
13160ac07820SSatish Balay 
13170ac07820SSatish Balay   a->bs  = oldmat->bs;
13180ac07820SSatish Balay   a->bs2 = oldmat->bs2;
13190ac07820SSatish Balay   a->mbs = oldmat->mbs;
13200ac07820SSatish Balay   a->nbs = oldmat->nbs;
13210ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
13220ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
13230ac07820SSatish Balay 
13240ac07820SSatish Balay   a->rstart       = oldmat->rstart;
13250ac07820SSatish Balay   a->rend         = oldmat->rend;
13260ac07820SSatish Balay   a->cstart       = oldmat->cstart;
13270ac07820SSatish Balay   a->cend         = oldmat->cend;
13280ac07820SSatish Balay   a->size         = oldmat->size;
13290ac07820SSatish Balay   a->rank         = oldmat->rank;
13300ac07820SSatish Balay   a->insertmode   = NOT_SET_VALUES;
13310ac07820SSatish Balay   a->rowvalues    = 0;
13320ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
13330ac07820SSatish Balay 
13340ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
13350ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
13360ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
13370ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
13380ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
13390ac07820SSatish Balay   if (oldmat->colmap) {
13400ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
13410ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
13420ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
13430ac07820SSatish Balay   } else a->colmap = 0;
13440ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
13450ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
13460ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
13470ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
13480ac07820SSatish Balay   } else a->garray = 0;
13490ac07820SSatish Balay 
13500ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
13510ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
13520ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
13530ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
13540ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
13550ac07820SSatish Balay   PLogObjectParent(mat,a->A);
13560ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
13570ac07820SSatish Balay   PLogObjectParent(mat,a->B);
13580ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
13590ac07820SSatish Balay   if (flg) {
13600ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
13610ac07820SSatish Balay   }
13620ac07820SSatish Balay   *newmat = mat;
13630ac07820SSatish Balay   return 0;
13640ac07820SSatish Balay }
136557b952d6SSatish Balay 
136657b952d6SSatish Balay #include "sys.h"
136757b952d6SSatish Balay 
136857b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
136957b952d6SSatish Balay {
137057b952d6SSatish Balay   Mat          A;
137157b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
137257b952d6SSatish Balay   Scalar       *vals,*buf;
137357b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
137457b952d6SSatish Balay   MPI_Status   status;
1375cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
137657b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
137757b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
137857b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
137957b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
138057b952d6SSatish Balay 
138157b952d6SSatish Balay 
138257b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
138357b952d6SSatish Balay   bs2  = bs*bs;
138457b952d6SSatish Balay 
138557b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
138657b952d6SSatish Balay   if (!rank) {
138757b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
138857b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1389cee3aa6bSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object");
139057b952d6SSatish Balay   }
139157b952d6SSatish Balay 
139257b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
139357b952d6SSatish Balay   M = header[1]; N = header[2];
139457b952d6SSatish Balay 
1395b0267e0aSLois Curfman McInnes   if (M != N) SETERRQ(1,"MatLoad_MPIBAIJ:Can only do square matrices");
139657b952d6SSatish Balay 
139757b952d6SSatish Balay   /*
139857b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
139957b952d6SSatish Balay      divisible by the blocksize
140057b952d6SSatish Balay   */
140157b952d6SSatish Balay   Mbs        = M/bs;
140257b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
140357b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
140457b952d6SSatish Balay   else                  Mbs++;
140557b952d6SSatish Balay   if (extra_rows &&!rank) {
1406b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
140757b952d6SSatish Balay   }
1408537820f0SBarry Smith 
140957b952d6SSatish Balay   /* determine ownership of all rows */
141057b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
141157b952d6SSatish Balay   m   = mbs * bs;
1412cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1413cee3aa6bSSatish Balay   browners = rowners + size + 1;
141457b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
141557b952d6SSatish Balay   rowners[0] = 0;
1416cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1417cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
141857b952d6SSatish Balay   rstart = rowners[rank];
141957b952d6SSatish Balay   rend   = rowners[rank+1];
142057b952d6SSatish Balay 
142157b952d6SSatish Balay   /* distribute row lengths to all processors */
142257b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
142357b952d6SSatish Balay   if (!rank) {
142457b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
142557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
142657b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
142757b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1428cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1429cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
143057b952d6SSatish Balay     PetscFree(sndcounts);
143157b952d6SSatish Balay   }
143257b952d6SSatish Balay   else {
143357b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
143457b952d6SSatish Balay   }
143557b952d6SSatish Balay 
143657b952d6SSatish Balay   if (!rank) {
143757b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
143857b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
143957b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
144057b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
144157b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
144257b952d6SSatish Balay         procsnz[i] += rowlengths[j];
144357b952d6SSatish Balay       }
144457b952d6SSatish Balay     }
144557b952d6SSatish Balay     PetscFree(rowlengths);
144657b952d6SSatish Balay 
144757b952d6SSatish Balay     /* determine max buffer needed and allocate it */
144857b952d6SSatish Balay     maxnz = 0;
144957b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
145057b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
145157b952d6SSatish Balay     }
145257b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
145357b952d6SSatish Balay 
145457b952d6SSatish Balay     /* read in my part of the matrix column indices  */
145557b952d6SSatish Balay     nz = procsnz[0];
145657b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
145757b952d6SSatish Balay     mycols = ibuf;
1458cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
145957b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1460cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1461cee3aa6bSSatish Balay 
146257b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
146357b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
146457b952d6SSatish Balay       nz = procsnz[i];
146557b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
146657b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
146757b952d6SSatish Balay     }
146857b952d6SSatish Balay     /* read in the stuff for the last proc */
146957b952d6SSatish Balay     if ( size != 1 ) {
147057b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
147157b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
147257b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1473cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
147457b952d6SSatish Balay     }
147557b952d6SSatish Balay     PetscFree(cols);
147657b952d6SSatish Balay   }
147757b952d6SSatish Balay   else {
147857b952d6SSatish Balay     /* determine buffer space needed for message */
147957b952d6SSatish Balay     nz = 0;
148057b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
148157b952d6SSatish Balay       nz += locrowlens[i];
148257b952d6SSatish Balay     }
148357b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
148457b952d6SSatish Balay     mycols = ibuf;
148557b952d6SSatish Balay     /* receive message of column indices*/
148657b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
148757b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1488cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
148957b952d6SSatish Balay   }
149057b952d6SSatish Balay 
149157b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1492cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1493cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
149457b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1495cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
149657b952d6SSatish Balay   masked1 = mask    + Mbs;
149757b952d6SSatish Balay   masked2 = masked1 + Mbs;
149857b952d6SSatish Balay   rowcount = 0; nzcount = 0;
149957b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
150057b952d6SSatish Balay     dcount  = 0;
150157b952d6SSatish Balay     odcount = 0;
150257b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
150357b952d6SSatish Balay       kmax = locrowlens[rowcount];
150457b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
150557b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
150657b952d6SSatish Balay         if (!mask[tmp]) {
150757b952d6SSatish Balay           mask[tmp] = 1;
150857b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
150957b952d6SSatish Balay           else masked1[dcount++] = tmp;
151057b952d6SSatish Balay         }
151157b952d6SSatish Balay       }
151257b952d6SSatish Balay       rowcount++;
151357b952d6SSatish Balay     }
1514cee3aa6bSSatish Balay 
151557b952d6SSatish Balay     dlens[i]  = dcount;
151657b952d6SSatish Balay     odlens[i] = odcount;
1517cee3aa6bSSatish Balay 
151857b952d6SSatish Balay     /* zero out the mask elements we set */
151957b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
152057b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
152157b952d6SSatish Balay   }
1522cee3aa6bSSatish Balay 
152357b952d6SSatish Balay   /* create our matrix */
1524537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1525537820f0SBarry Smith          CHKERRQ(ierr);
152657b952d6SSatish Balay   A = *newmat;
15276d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
152857b952d6SSatish Balay 
152957b952d6SSatish Balay   if (!rank) {
153057b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
153157b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
153257b952d6SSatish Balay     nz = procsnz[0];
153357b952d6SSatish Balay     vals = buf;
1534cee3aa6bSSatish Balay     mycols = ibuf;
1535cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
153657b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1537cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1538537820f0SBarry Smith 
153957b952d6SSatish Balay     /* insert into matrix */
154057b952d6SSatish Balay     jj      = rstart*bs;
154157b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
154257b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
154357b952d6SSatish Balay       mycols += locrowlens[i];
154457b952d6SSatish Balay       vals   += locrowlens[i];
154557b952d6SSatish Balay       jj++;
154657b952d6SSatish Balay     }
154757b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
154857b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
154957b952d6SSatish Balay       nz = procsnz[i];
155057b952d6SSatish Balay       vals = buf;
155157b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
155257b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
155357b952d6SSatish Balay     }
155457b952d6SSatish Balay     /* the last proc */
155557b952d6SSatish Balay     if ( size != 1 ){
155657b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1557cee3aa6bSSatish Balay       vals = buf;
155857b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
155957b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1560cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
156157b952d6SSatish Balay     }
156257b952d6SSatish Balay     PetscFree(procsnz);
156357b952d6SSatish Balay   }
156457b952d6SSatish Balay   else {
156557b952d6SSatish Balay     /* receive numeric values */
156657b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
156757b952d6SSatish Balay 
156857b952d6SSatish Balay     /* receive message of values*/
156957b952d6SSatish Balay     vals = buf;
1570cee3aa6bSSatish Balay     mycols = ibuf;
157157b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
157257b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1573cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
157457b952d6SSatish Balay 
157557b952d6SSatish Balay     /* insert into matrix */
157657b952d6SSatish Balay     jj      = rstart*bs;
1577cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
157857b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
157957b952d6SSatish Balay       mycols += locrowlens[i];
158057b952d6SSatish Balay       vals   += locrowlens[i];
158157b952d6SSatish Balay       jj++;
158257b952d6SSatish Balay     }
158357b952d6SSatish Balay   }
158457b952d6SSatish Balay   PetscFree(locrowlens);
158557b952d6SSatish Balay   PetscFree(buf);
158657b952d6SSatish Balay   PetscFree(ibuf);
158757b952d6SSatish Balay   PetscFree(rowners);
158857b952d6SSatish Balay   PetscFree(dlens);
1589cee3aa6bSSatish Balay   PetscFree(mask);
15906d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15916d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
159257b952d6SSatish Balay   return 0;
159357b952d6SSatish Balay }
159457b952d6SSatish Balay 
159557b952d6SSatish Balay 
1596