xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 70f55243aafb320636e2a54ff30cab5d1e8d3d7b)
179bdfe76SSatish Balay #ifndef lint
2*70f55243SBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.20 1996/08/06 16:51:30 balay Exp bsmith $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
5*70f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2357b952d6SSatish Balay 
2457b952d6SSatish Balay /* local utility routine that creates a mapping from the global column
2557b952d6SSatish Balay number to the local number in the off-diagonal part of the local
2657b952d6SSatish Balay storage of the matrix.  This is done in a non scable way since the
2757b952d6SSatish Balay length of colmap equals the global matrix length.
2857b952d6SSatish Balay */
2957b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3057b952d6SSatish Balay {
3157b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3257b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
3357b952d6SSatish Balay   int        nbs = B->nbs,i;
3457b952d6SSatish Balay 
3557b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
3657b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3757b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
38905e6a2fSBarry Smith   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i + 1;
3957b952d6SSatish Balay   return 0;
4057b952d6SSatish Balay }
4157b952d6SSatish Balay 
4257b952d6SSatish Balay 
43acdf5bf4SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm)
4457b952d6SSatish Balay {
4557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4657b952d6SSatish Balay   int         ierr;
4757b952d6SSatish Balay   if (baij->size == 1) {
4857b952d6SSatish Balay     ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr);
4957b952d6SSatish Balay   } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel");
5057b952d6SSatish Balay   return 0;
5157b952d6SSatish Balay }
5257b952d6SSatish Balay 
5357b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
5457b952d6SSatish Balay {
5557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
5657b952d6SSatish Balay   Scalar      value;
5757b952d6SSatish Balay   int         ierr,i,j, rstart = baij->rstart, rend = baij->rend;
5857b952d6SSatish Balay   int         cstart = baij->cstart, cend = baij->cend,row,col;
5957b952d6SSatish Balay   int         roworiented = baij->roworiented,rstart_orig,rend_orig;
6057b952d6SSatish Balay   int         cstart_orig,cend_orig,bs=baij->bs;
6157b952d6SSatish Balay 
6257b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
6357b952d6SSatish Balay     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
6457b952d6SSatish Balay   }
6557b952d6SSatish Balay   baij->insertmode = addv;
6657b952d6SSatish Balay   rstart_orig = rstart*bs;
6757b952d6SSatish Balay   rend_orig   = rend*bs;
6857b952d6SSatish Balay   cstart_orig = cstart*bs;
6957b952d6SSatish Balay   cend_orig   = cend*bs;
7057b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
7157b952d6SSatish Balay     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
7257b952d6SSatish Balay     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
7357b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
7457b952d6SSatish Balay       row = im[i] - rstart_orig;
7557b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
7657b952d6SSatish Balay         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");
7757b952d6SSatish Balay         if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");
7857b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
7957b952d6SSatish Balay           col = in[j] - cstart_orig;
8057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
8157b952d6SSatish Balay           ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
8257b952d6SSatish Balay         }
8357b952d6SSatish Balay         else {
8457b952d6SSatish Balay           if (mat->was_assembled) {
85905e6a2fSBarry Smith             if (!baij->colmap) {
86905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
87905e6a2fSBarry Smith             }
88905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
8957b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
9057b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
9157b952d6SSatish Balay               col =  in[j];
9257b952d6SSatish Balay             }
9357b952d6SSatish Balay           }
9457b952d6SSatish Balay           else col = in[j];
9557b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
9657b952d6SSatish Balay           ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
9757b952d6SSatish Balay         }
9857b952d6SSatish Balay       }
9957b952d6SSatish Balay     }
10057b952d6SSatish Balay     else {
10157b952d6SSatish Balay       if (roworiented) {
10257b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
10357b952d6SSatish Balay       }
10457b952d6SSatish Balay       else {
10557b952d6SSatish Balay         row = im[i];
10657b952d6SSatish Balay         for ( j=0; j<n; j++ ) {
10757b952d6SSatish Balay           ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
10857b952d6SSatish Balay         }
10957b952d6SSatish Balay       }
11057b952d6SSatish Balay     }
11157b952d6SSatish Balay   }
11257b952d6SSatish Balay   return 0;
11357b952d6SSatish Balay }
11457b952d6SSatish Balay 
115d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
116d6de1c52SSatish Balay {
117d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
118d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
119d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
120d6de1c52SSatish Balay 
121d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
122d6de1c52SSatish Balay     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row");
123d6de1c52SSatish Balay     if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large");
124d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
125d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
126d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
127d6de1c52SSatish Balay         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column");
128d6de1c52SSatish Balay         if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large");
129d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
130d6de1c52SSatish Balay           col = idxn[j] - bscstart;
131d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
132d6de1c52SSatish Balay         }
133d6de1c52SSatish Balay         else {
134905e6a2fSBarry Smith           if (!baij->colmap) {
135905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
136905e6a2fSBarry Smith           }
137e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
138e60e1c95SSatish Balay              (baij->garray[baij->colmap[idxn[j]/bs]-1] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
139d9d09a02SSatish Balay           else {
140905e6a2fSBarry Smith             col  = (baij->colmap[idxn[j]/bs]-1)*bs + idxn[j]%bs;
141d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
142d6de1c52SSatish Balay           }
143d6de1c52SSatish Balay         }
144d6de1c52SSatish Balay       }
145d9d09a02SSatish Balay     }
146d6de1c52SSatish Balay     else {
147d6de1c52SSatish Balay       SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported");
148d6de1c52SSatish Balay     }
149d6de1c52SSatish Balay   }
150d6de1c52SSatish Balay   return 0;
151d6de1c52SSatish Balay }
152d6de1c52SSatish Balay 
153d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
154d6de1c52SSatish Balay {
155d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
156d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
157acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
158d6de1c52SSatish Balay   double     sum = 0.0;
159d6de1c52SSatish Balay   Scalar     *v;
160d6de1c52SSatish Balay 
161d6de1c52SSatish Balay   if (baij->size == 1) {
162d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
163d6de1c52SSatish Balay   } else {
164d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
165d6de1c52SSatish Balay       v = amat->a;
166d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
167d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
168d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
169d6de1c52SSatish Balay #else
170d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
171d6de1c52SSatish Balay #endif
172d6de1c52SSatish Balay       }
173d6de1c52SSatish Balay       v = bmat->a;
174d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
175d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
176d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
177d6de1c52SSatish Balay #else
178d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
179d6de1c52SSatish Balay #endif
180d6de1c52SSatish Balay       }
181d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
182d6de1c52SSatish Balay       *norm = sqrt(*norm);
183d6de1c52SSatish Balay     }
184acdf5bf4SSatish Balay     else
185acdf5bf4SSatish Balay       SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
186d6de1c52SSatish Balay   }
187d6de1c52SSatish Balay   return 0;
188d6de1c52SSatish Balay }
18957b952d6SSatish Balay 
19057b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
19157b952d6SSatish Balay {
19257b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
19357b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
19457b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
19557b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
19657b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
19757b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
19857b952d6SSatish Balay   InsertMode  addv;
19957b952d6SSatish Balay   Scalar      *rvalues,*svalues;
20057b952d6SSatish Balay 
20157b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
20257b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
20357b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
20457b952d6SSatish Balay     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
20557b952d6SSatish Balay   }
20657b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
20757b952d6SSatish Balay 
20857b952d6SSatish Balay   /*  first count number of contributors to each processor */
20957b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
21057b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
21157b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
21257b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
21357b952d6SSatish Balay     idx = baij->stash.idx[i];
21457b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
21557b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
21657b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
21757b952d6SSatish Balay       }
21857b952d6SSatish Balay     }
21957b952d6SSatish Balay   }
22057b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
22157b952d6SSatish Balay 
22257b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
22357b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
22457b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
22557b952d6SSatish Balay   nreceives = work[rank];
22657b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
22757b952d6SSatish Balay   nmax = work[rank];
22857b952d6SSatish Balay   PetscFree(work);
22957b952d6SSatish Balay 
23057b952d6SSatish Balay   /* post receives:
23157b952d6SSatish Balay        1) each message will consist of ordered pairs
23257b952d6SSatish Balay      (global index,value) we store the global index as a double
23357b952d6SSatish Balay      to simplify the message passing.
23457b952d6SSatish Balay        2) since we don't know how long each individual message is we
23557b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
23657b952d6SSatish Balay      this is a lot of wasted space.
23757b952d6SSatish Balay 
23857b952d6SSatish Balay 
23957b952d6SSatish Balay        This could be done better.
24057b952d6SSatish Balay   */
24157b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
24257b952d6SSatish Balay   CHKPTRQ(rvalues);
24357b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
24457b952d6SSatish Balay   CHKPTRQ(recv_waits);
24557b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
24657b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
24757b952d6SSatish Balay               comm,recv_waits+i);
24857b952d6SSatish Balay   }
24957b952d6SSatish Balay 
25057b952d6SSatish Balay   /* do sends:
25157b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
25257b952d6SSatish Balay          the ith processor
25357b952d6SSatish Balay   */
25457b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
25557b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
25657b952d6SSatish Balay   CHKPTRQ(send_waits);
25757b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
25857b952d6SSatish Balay   starts[0] = 0;
25957b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
26057b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
26157b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
26257b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
26357b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
26457b952d6SSatish Balay   }
26557b952d6SSatish Balay   PetscFree(owner);
26657b952d6SSatish Balay   starts[0] = 0;
26757b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
26857b952d6SSatish Balay   count = 0;
26957b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
27057b952d6SSatish Balay     if (procs[i]) {
27157b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
27257b952d6SSatish Balay                 comm,send_waits+count++);
27357b952d6SSatish Balay     }
27457b952d6SSatish Balay   }
27557b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
27657b952d6SSatish Balay 
27757b952d6SSatish Balay   /* Free cache space */
27857b952d6SSatish Balay   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n);
27957b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
28057b952d6SSatish Balay 
28157b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
28257b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
28357b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
28457b952d6SSatish Balay   baij->rmax       = nmax;
28557b952d6SSatish Balay 
28657b952d6SSatish Balay   return 0;
28757b952d6SSatish Balay }
28857b952d6SSatish Balay 
28957b952d6SSatish Balay 
29057b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
29157b952d6SSatish Balay {
29257b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
29357b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
29457b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
29557b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
29657b952d6SSatish Balay   Scalar      *values,val;
29757b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
29857b952d6SSatish Balay 
29957b952d6SSatish Balay   /*  wait on receives */
30057b952d6SSatish Balay   while (count) {
30157b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
30257b952d6SSatish Balay     /* unpack receives into our local space */
30357b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
30457b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
30557b952d6SSatish Balay     n = n/3;
30657b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
30757b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
30857b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
30957b952d6SSatish Balay       val = values[3*i+2];
31057b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
31157b952d6SSatish Balay         col -= baij->cstart*bs;
31257b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
31357b952d6SSatish Balay       }
31457b952d6SSatish Balay       else {
31557b952d6SSatish Balay         if (mat->was_assembled) {
316905e6a2fSBarry Smith           if (!baij->colmap) {
317905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
318905e6a2fSBarry Smith           }
319905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
32057b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
32157b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
32257b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
32357b952d6SSatish Balay           }
32457b952d6SSatish Balay         }
32557b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
32657b952d6SSatish Balay       }
32757b952d6SSatish Balay     }
32857b952d6SSatish Balay     count--;
32957b952d6SSatish Balay   }
33057b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
33157b952d6SSatish Balay 
33257b952d6SSatish Balay   /* wait on sends */
33357b952d6SSatish Balay   if (baij->nsends) {
33457b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
33557b952d6SSatish Balay     CHKPTRQ(send_status);
33657b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
33757b952d6SSatish Balay     PetscFree(send_status);
33857b952d6SSatish Balay   }
33957b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
34057b952d6SSatish Balay 
34157b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
34257b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
34357b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
34457b952d6SSatish Balay 
34557b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
34657b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
34757b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
34857b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
34957b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
35057b952d6SSatish Balay   }
35157b952d6SSatish Balay 
3526d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
35357b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
35457b952d6SSatish Balay   }
35557b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
35657b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
35757b952d6SSatish Balay 
35857b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
35957b952d6SSatish Balay   return 0;
36057b952d6SSatish Balay }
36157b952d6SSatish Balay 
36257b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
36357b952d6SSatish Balay {
36457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
36557b952d6SSatish Balay   int          ierr;
36657b952d6SSatish Balay 
36757b952d6SSatish Balay   if (baij->size == 1) {
36857b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
36957b952d6SSatish Balay   }
37057b952d6SSatish Balay   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
37157b952d6SSatish Balay   return 0;
37257b952d6SSatish Balay }
37357b952d6SSatish Balay 
37457b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
37557b952d6SSatish Balay {
37657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
377cee3aa6bSSatish Balay   int          ierr, format,rank,bs=baij->bs;
37857b952d6SSatish Balay   FILE         *fd;
37957b952d6SSatish Balay   ViewerType   vtype;
38057b952d6SSatish Balay 
38157b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
38257b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
38357b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
38457b952d6SSatish Balay     if (format == ASCII_FORMAT_INFO_DETAILED) {
38557b952d6SSatish Balay       int nz, nzalloc, mem;
38657b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
38757b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
38857b952d6SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
38957b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
39057b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
39157b952d6SSatish Balay               rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem);
39257b952d6SSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
39357b952d6SSatish Balay       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs);
39457b952d6SSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
39557b952d6SSatish Balay       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs);
39657b952d6SSatish Balay       fflush(fd);
39757b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
39857b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
39957b952d6SSatish Balay       return 0;
40057b952d6SSatish Balay     }
40157b952d6SSatish Balay     else if (format == ASCII_FORMAT_INFO) {
40257b952d6SSatish Balay       return 0;
40357b952d6SSatish Balay     }
40457b952d6SSatish Balay   }
40557b952d6SSatish Balay 
40657b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
40757b952d6SSatish Balay     Draw       draw;
40857b952d6SSatish Balay     PetscTruth isnull;
40957b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
41057b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
41157b952d6SSatish Balay   }
41257b952d6SSatish Balay 
41357b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
41457b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
41557b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
41657b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
41757b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
41857b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
41957b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
42057b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
42157b952d6SSatish Balay     fflush(fd);
42257b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
42357b952d6SSatish Balay   }
42457b952d6SSatish Balay   else {
42557b952d6SSatish Balay     int size = baij->size;
42657b952d6SSatish Balay     rank = baij->rank;
42757b952d6SSatish Balay     if (size == 1) {
42857b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
42957b952d6SSatish Balay     }
43057b952d6SSatish Balay     else {
43157b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
43257b952d6SSatish Balay       Mat         A;
43357b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
43457b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
43557b952d6SSatish Balay       int         mbs=baij->mbs;
43657b952d6SSatish Balay       Scalar      *a;
43757b952d6SSatish Balay 
43857b952d6SSatish Balay       if (!rank) {
439cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
44057b952d6SSatish Balay         CHKERRQ(ierr);
44157b952d6SSatish Balay       }
44257b952d6SSatish Balay       else {
443cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
44457b952d6SSatish Balay         CHKERRQ(ierr);
44557b952d6SSatish Balay       }
44657b952d6SSatish Balay       PLogObjectParent(mat,A);
44757b952d6SSatish Balay 
44857b952d6SSatish Balay       /* copy over the A part */
44957b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
45057b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
45157b952d6SSatish Balay       row = baij->rstart;
45257b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
45357b952d6SSatish Balay 
45457b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
45557b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
45657b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
45757b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
45857b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
45957b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
460cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
461cee3aa6bSSatish Balay             col++; a += bs;
46257b952d6SSatish Balay           }
46357b952d6SSatish Balay         }
46457b952d6SSatish Balay       }
46557b952d6SSatish Balay       /* copy over the B part */
46657b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
46757b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
46857b952d6SSatish Balay       row = baij->rstart*bs;
46957b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
47057b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
47157b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
47257b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
47357b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
47457b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
475cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
476cee3aa6bSSatish Balay             col++; a += bs;
47757b952d6SSatish Balay           }
47857b952d6SSatish Balay         }
47957b952d6SSatish Balay       }
48057b952d6SSatish Balay       PetscFree(rvals);
4816d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
4826d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
48357b952d6SSatish Balay       if (!rank) {
48457b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
48557b952d6SSatish Balay       }
48657b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
48757b952d6SSatish Balay     }
48857b952d6SSatish Balay   }
48957b952d6SSatish Balay   return 0;
49057b952d6SSatish Balay }
49157b952d6SSatish Balay 
49257b952d6SSatish Balay 
49357b952d6SSatish Balay 
49457b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
49557b952d6SSatish Balay {
49657b952d6SSatish Balay   Mat         mat = (Mat) obj;
49757b952d6SSatish Balay   int         ierr;
49857b952d6SSatish Balay   ViewerType  vtype;
49957b952d6SSatish Balay 
50057b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
50157b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
50257b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
50357b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
50457b952d6SSatish Balay   }
50557b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
50657b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
50757b952d6SSatish Balay   }
50857b952d6SSatish Balay   return 0;
50957b952d6SSatish Balay }
51057b952d6SSatish Balay 
51179bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
51279bdfe76SSatish Balay {
51379bdfe76SSatish Balay   Mat         mat = (Mat) obj;
51479bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
51579bdfe76SSatish Balay   int         ierr;
51679bdfe76SSatish Balay 
51779bdfe76SSatish Balay #if defined(PETSC_LOG)
51879bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
51979bdfe76SSatish Balay #endif
52079bdfe76SSatish Balay 
52179bdfe76SSatish Balay   PetscFree(baij->rowners);
52279bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
52379bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
52479bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
52579bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
52679bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
52779bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
52879bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
52979bdfe76SSatish Balay   PetscFree(baij);
53079bdfe76SSatish Balay   PLogObjectDestroy(mat);
53179bdfe76SSatish Balay   PetscHeaderDestroy(mat);
53279bdfe76SSatish Balay   return 0;
53379bdfe76SSatish Balay }
53479bdfe76SSatish Balay 
535cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
536cee3aa6bSSatish Balay {
537cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
53847b4a8eaSLois Curfman McInnes   int         ierr, nt;
539cee3aa6bSSatish Balay 
540c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
54147b4a8eaSLois Curfman McInnes   if (nt != a->n) {
5420ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and xx");
54347b4a8eaSLois Curfman McInnes   }
544c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
54547b4a8eaSLois Curfman McInnes   if (nt != a->m) {
5460ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and yy");
54747b4a8eaSLois Curfman McInnes   }
548cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
549cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
550cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
551cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
552cee3aa6bSSatish Balay   return 0;
553cee3aa6bSSatish Balay }
554cee3aa6bSSatish Balay 
555cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
556cee3aa6bSSatish Balay {
557cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
558cee3aa6bSSatish Balay   int        ierr;
559cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
560cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
561cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
562cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
563cee3aa6bSSatish Balay   return 0;
564cee3aa6bSSatish Balay }
565cee3aa6bSSatish Balay 
566cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
567cee3aa6bSSatish Balay {
568cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
569cee3aa6bSSatish Balay   int        ierr;
570cee3aa6bSSatish Balay 
571cee3aa6bSSatish Balay   /* do nondiagonal part */
572cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
573cee3aa6bSSatish Balay   /* send it on its way */
574cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
575cee3aa6bSSatish Balay                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
576cee3aa6bSSatish Balay   /* do local part */
577cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
578cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
579cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
580cee3aa6bSSatish Balay   /* but is not perhaps always true. */
581cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
582cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
583cee3aa6bSSatish Balay   return 0;
584cee3aa6bSSatish Balay }
585cee3aa6bSSatish Balay 
586cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
587cee3aa6bSSatish Balay {
588cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
589cee3aa6bSSatish Balay   int        ierr;
590cee3aa6bSSatish Balay 
591cee3aa6bSSatish Balay   /* do nondiagonal part */
592cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
593cee3aa6bSSatish Balay   /* send it on its way */
594cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
595cee3aa6bSSatish Balay                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
596cee3aa6bSSatish Balay   /* do local part */
597cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
598cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
599cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
600cee3aa6bSSatish Balay   /* but is not perhaps always true. */
601cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
602cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
603cee3aa6bSSatish Balay   return 0;
604cee3aa6bSSatish Balay }
605cee3aa6bSSatish Balay 
606cee3aa6bSSatish Balay /*
607cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
608cee3aa6bSSatish Balay    diagonal block
609cee3aa6bSSatish Balay */
610cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
611cee3aa6bSSatish Balay {
612cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
613cee3aa6bSSatish Balay   if (a->M != a->N)
614cee3aa6bSSatish Balay     SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block");
615cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
616cee3aa6bSSatish Balay }
617cee3aa6bSSatish Balay 
618cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
619cee3aa6bSSatish Balay {
620cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
621cee3aa6bSSatish Balay   int        ierr;
622cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
623cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
624cee3aa6bSSatish Balay   return 0;
625cee3aa6bSSatish Balay }
62657b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
62757b952d6SSatish Balay {
62857b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
62957b952d6SSatish Balay   *m = mat->M; *n = mat->N;
63057b952d6SSatish Balay   return 0;
63157b952d6SSatish Balay }
63257b952d6SSatish Balay 
63357b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
63457b952d6SSatish Balay {
63557b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
63657b952d6SSatish Balay   *m = mat->m; *n = mat->N;
63757b952d6SSatish Balay   return 0;
63857b952d6SSatish Balay }
63957b952d6SSatish Balay 
64057b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
64157b952d6SSatish Balay {
64257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
64357b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
64457b952d6SSatish Balay   return 0;
64557b952d6SSatish Balay }
64657b952d6SSatish Balay 
647acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
648acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
649acdf5bf4SSatish Balay 
650acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
651acdf5bf4SSatish Balay {
652acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
653acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
654acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
655d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
656d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
657acdf5bf4SSatish Balay 
658acdf5bf4SSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active");
659acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
660acdf5bf4SSatish Balay 
661acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
662acdf5bf4SSatish Balay     /*
663acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
664acdf5bf4SSatish Balay     */
665acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
666bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
667bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
668acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
669acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
670acdf5bf4SSatish Balay     }
671acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
672acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
673acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
674acdf5bf4SSatish Balay   }
675acdf5bf4SSatish Balay 
676acdf5bf4SSatish Balay 
677d9d09a02SSatish Balay   if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows")
678d9d09a02SSatish Balay   lrow = row - brstart;
679acdf5bf4SSatish Balay 
680acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
681acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
682acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
683acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
684acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
685acdf5bf4SSatish Balay   nztot = nzA + nzB;
686acdf5bf4SSatish Balay 
687acdf5bf4SSatish Balay   cmap  = mat->garray;
688acdf5bf4SSatish Balay   if (v  || idx) {
689acdf5bf4SSatish Balay     if (nztot) {
690acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
691acdf5bf4SSatish Balay       int imark = -1;
692acdf5bf4SSatish Balay       if (v) {
693acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
694acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
695d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
696acdf5bf4SSatish Balay           else break;
697acdf5bf4SSatish Balay         }
698acdf5bf4SSatish Balay         imark = i;
699acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
700acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
701acdf5bf4SSatish Balay       }
702acdf5bf4SSatish Balay       if (idx) {
703acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
704acdf5bf4SSatish Balay         if (imark > -1) {
705acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
706bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
707acdf5bf4SSatish Balay           }
708acdf5bf4SSatish Balay         } else {
709acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
710d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
711d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
712acdf5bf4SSatish Balay             else break;
713acdf5bf4SSatish Balay           }
714acdf5bf4SSatish Balay           imark = i;
715acdf5bf4SSatish Balay         }
716d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
717d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
718acdf5bf4SSatish Balay       }
719acdf5bf4SSatish Balay     }
720d212a18eSSatish Balay     else {
721d212a18eSSatish Balay       if (idx) *idx = 0;
722d212a18eSSatish Balay       if (v)   *v   = 0;
723d212a18eSSatish Balay     }
724acdf5bf4SSatish Balay   }
725acdf5bf4SSatish Balay   *nz = nztot;
726acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
727acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
728acdf5bf4SSatish Balay   return 0;
729acdf5bf4SSatish Balay }
730acdf5bf4SSatish Balay 
731acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
732acdf5bf4SSatish Balay {
733acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
734acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
735acdf5bf4SSatish Balay     SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called");
736acdf5bf4SSatish Balay   }
737acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
738acdf5bf4SSatish Balay   return 0;
739acdf5bf4SSatish Balay }
740acdf5bf4SSatish Balay 
7415a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat, int *bs)
7425a838052SSatish Balay {
7435a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
7445a838052SSatish Balay   *bs = baij->bs;
7455a838052SSatish Balay   return 0;
7465a838052SSatish Balay }
7475a838052SSatish Balay 
74858667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A)
74958667388SSatish Balay {
75058667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
75158667388SSatish Balay   int         ierr;
75258667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
75358667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
75458667388SSatish Balay   return 0;
75558667388SSatish Balay }
7560ac07820SSatish Balay 
7570ac07820SSatish Balay static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,int *nz,
7580ac07820SSatish Balay                              int *nzalloc,int *mem)
7590ac07820SSatish Balay {
7600ac07820SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
7610ac07820SSatish Balay   Mat         A = mat->A, B = mat->B;
7620ac07820SSatish Balay   int         ierr, isend[3], irecv[3], nzA, nzallocA, memA;
7630ac07820SSatish Balay 
7640ac07820SSatish Balay   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
7650ac07820SSatish Balay   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
7660ac07820SSatish Balay   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
7670ac07820SSatish Balay   if (flag == MAT_LOCAL) {
7680ac07820SSatish Balay     if (nz)       *nz      = isend[0];
7690ac07820SSatish Balay     if (nzalloc)  *nzalloc = isend[1];
7700ac07820SSatish Balay     if (mem)      *mem     = isend[2];
7710ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
7720ac07820SSatish Balay     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
7730ac07820SSatish Balay     if (nz)      *nz      = irecv[0];
7740ac07820SSatish Balay     if (nzalloc) *nzalloc = irecv[1];
7750ac07820SSatish Balay     if (mem)     *mem     = irecv[2];
7760ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
7770ac07820SSatish Balay     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
7780ac07820SSatish Balay     if (nz)      *nz      = irecv[0];
7790ac07820SSatish Balay     if (nzalloc) *nzalloc = irecv[1];
7800ac07820SSatish Balay     if (mem)     *mem     = irecv[2];
7810ac07820SSatish Balay   }
7820ac07820SSatish Balay   return 0;
7830ac07820SSatish Balay }
7840ac07820SSatish Balay 
78558667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op)
78658667388SSatish Balay {
78758667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
78858667388SSatish Balay 
78958667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
79058667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
79158667388SSatish Balay       op == MAT_COLUMNS_SORTED ||
79258667388SSatish Balay       op == MAT_ROW_ORIENTED) {
79358667388SSatish Balay         MatSetOption(a->A,op);
79458667388SSatish Balay         MatSetOption(a->B,op);
79558667388SSatish Balay   }
79658667388SSatish Balay   else if (op == MAT_ROWS_SORTED ||
79758667388SSatish Balay            op == MAT_SYMMETRIC ||
79858667388SSatish Balay            op == MAT_STRUCTURALLY_SYMMETRIC ||
79958667388SSatish Balay            op == MAT_YES_NEW_DIAGONALS)
80058667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
80158667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
80258667388SSatish Balay     a->roworiented = 0;
80358667388SSatish Balay     MatSetOption(a->A,op);
80458667388SSatish Balay     MatSetOption(a->B,op);
80558667388SSatish Balay   }
80658667388SSatish Balay   else if (op == MAT_NO_NEW_DIAGONALS)
8070ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:MAT_NO_NEW_DIAGONALS");}
80858667388SSatish Balay   else
8090ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:unknown option");}
81058667388SSatish Balay   return 0;
81158667388SSatish Balay }
81258667388SSatish Balay 
8130ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
8140ac07820SSatish Balay {
8150ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
8160ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
8170ac07820SSatish Balay   Mat        B;
8180ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
8190ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
8200ac07820SSatish Balay   Scalar     *a;
8210ac07820SSatish Balay 
8220ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
8230ac07820SSatish Balay     SETERRQ(1,"MatTranspose_MPIBAIJ:Square matrix only for in-place");
8240ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
8250ac07820SSatish Balay   CHKERRQ(ierr);
8260ac07820SSatish Balay 
8270ac07820SSatish Balay   /* copy over the A part */
8280ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
8290ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
8300ac07820SSatish Balay   row = baij->rstart;
8310ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
8320ac07820SSatish Balay 
8330ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
8340ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
8350ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
8360ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
8370ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
8380ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
8390ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
8400ac07820SSatish Balay         col++; a += bs;
8410ac07820SSatish Balay       }
8420ac07820SSatish Balay     }
8430ac07820SSatish Balay   }
8440ac07820SSatish Balay   /* copy over the B part */
8450ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
8460ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
8470ac07820SSatish Balay   row = baij->rstart*bs;
8480ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
8490ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
8500ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
8510ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
8520ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
8530ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
8540ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
8550ac07820SSatish Balay         col++; a += bs;
8560ac07820SSatish Balay       }
8570ac07820SSatish Balay     }
8580ac07820SSatish Balay   }
8590ac07820SSatish Balay   PetscFree(rvals);
8600ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8610ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8620ac07820SSatish Balay 
8630ac07820SSatish Balay   if (matout != PETSC_NULL) {
8640ac07820SSatish Balay     *matout = B;
8650ac07820SSatish Balay   } else {
8660ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
8670ac07820SSatish Balay     PetscFree(baij->rowners);
8680ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
8690ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
8700ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
8710ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
8720ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
8730ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
8740ac07820SSatish Balay     PetscFree(baij);
8750ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
8760ac07820SSatish Balay     PetscHeaderDestroy(B);
8770ac07820SSatish Balay   }
8780ac07820SSatish Balay   return 0;
8790ac07820SSatish Balay }
8800ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
8810ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
8820ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
8830ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
8840ac07820SSatish Balay    routine.
8850ac07820SSatish Balay */
8860ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
8870ac07820SSatish Balay {
8880ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
8890ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
8900ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
8910ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
8920ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
8930ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
8940ac07820SSatish Balay   MPI_Comm       comm = A->comm;
8950ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
8960ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
8970ac07820SSatish Balay   IS             istmp;
8980ac07820SSatish Balay 
8990ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
9000ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
9010ac07820SSatish Balay 
9020ac07820SSatish Balay   /*  first count number of contributors to each processor */
9030ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
9040ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
9050ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
9060ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
9070ac07820SSatish Balay     idx = rows[i];
9080ac07820SSatish Balay     found = 0;
9090ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
9100ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
9110ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
9120ac07820SSatish Balay       }
9130ac07820SSatish Balay     }
9140ac07820SSatish Balay     if (!found) SETERRQ(1,"MatZeroRows_MPIBAIJ:Index out of range");
9150ac07820SSatish Balay   }
9160ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
9170ac07820SSatish Balay 
9180ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
9190ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
9200ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
9210ac07820SSatish Balay   nrecvs = work[rank];
9220ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
9230ac07820SSatish Balay   nmax = work[rank];
9240ac07820SSatish Balay   PetscFree(work);
9250ac07820SSatish Balay 
9260ac07820SSatish Balay   /* post receives:   */
9270ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
9280ac07820SSatish Balay   CHKPTRQ(rvalues);
9290ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
9300ac07820SSatish Balay   CHKPTRQ(recv_waits);
9310ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
9320ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
9330ac07820SSatish Balay   }
9340ac07820SSatish Balay 
9350ac07820SSatish Balay   /* do sends:
9360ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
9370ac07820SSatish Balay          the ith processor
9380ac07820SSatish Balay   */
9390ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
9400ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
9410ac07820SSatish Balay   CHKPTRQ(send_waits);
9420ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
9430ac07820SSatish Balay   starts[0] = 0;
9440ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
9450ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
9460ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
9470ac07820SSatish Balay   }
9480ac07820SSatish Balay   ISRestoreIndices(is,&rows);
9490ac07820SSatish Balay 
9500ac07820SSatish Balay   starts[0] = 0;
9510ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
9520ac07820SSatish Balay   count = 0;
9530ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
9540ac07820SSatish Balay     if (procs[i]) {
9550ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
9560ac07820SSatish Balay     }
9570ac07820SSatish Balay   }
9580ac07820SSatish Balay   PetscFree(starts);
9590ac07820SSatish Balay 
9600ac07820SSatish Balay   base = owners[rank]*bs;
9610ac07820SSatish Balay 
9620ac07820SSatish Balay   /*  wait on receives */
9630ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
9640ac07820SSatish Balay   source = lens + nrecvs;
9650ac07820SSatish Balay   count  = nrecvs; slen = 0;
9660ac07820SSatish Balay   while (count) {
9670ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
9680ac07820SSatish Balay     /* unpack receives into our local space */
9690ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
9700ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
9710ac07820SSatish Balay     lens[imdex]  = n;
9720ac07820SSatish Balay     slen += n;
9730ac07820SSatish Balay     count--;
9740ac07820SSatish Balay   }
9750ac07820SSatish Balay   PetscFree(recv_waits);
9760ac07820SSatish Balay 
9770ac07820SSatish Balay   /* move the data into the send scatter */
9780ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
9790ac07820SSatish Balay   count = 0;
9800ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
9810ac07820SSatish Balay     values = rvalues + i*nmax;
9820ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
9830ac07820SSatish Balay       lrows[count++] = values[j] - base;
9840ac07820SSatish Balay     }
9850ac07820SSatish Balay   }
9860ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
9870ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
9880ac07820SSatish Balay 
9890ac07820SSatish Balay   /* actually zap the local rows */
9900ac07820SSatish Balay   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
9910ac07820SSatish Balay   PLogObjectParent(A,istmp);
9920ac07820SSatish Balay   PetscFree(lrows);
9930ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
9940ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
9950ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
9960ac07820SSatish Balay 
9970ac07820SSatish Balay   /* wait on sends */
9980ac07820SSatish Balay   if (nsends) {
9990ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
10000ac07820SSatish Balay     CHKPTRQ(send_status);
10010ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
10020ac07820SSatish Balay     PetscFree(send_status);
10030ac07820SSatish Balay   }
10040ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
10050ac07820SSatish Balay 
10060ac07820SSatish Balay   return 0;
10070ac07820SSatish Balay }
1008ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
1009ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A)
1010ba4ca20aSSatish Balay {
1011ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1012ba4ca20aSSatish Balay 
1013ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1014ba4ca20aSSatish Balay   else return 0;
1015ba4ca20aSSatish Balay }
10160ac07820SSatish Balay 
10170ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
10180ac07820SSatish Balay 
101979bdfe76SSatish Balay /* -------------------------------------------------------------------*/
102079bdfe76SSatish Balay static struct _MatOps MatOps = {
1021bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
102293bc47c4SSatish Balay   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,MatSolve_MPIBAIJ,
102393bc47c4SSatish Balay   MatSolveAdd_MPIBAIJ,MatSolveTrans_MPIBAIJ,MatSolveTransAdd_MPIBAIJ,MatLUFactor_MPIBAIJ,
10240ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
1025acdf5bf4SSatish Balay   0,MatGetDiagonal_MPIBAIJ,0,MatNorm_MPIBAIJ,
102658667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
10270ac07820SSatish Balay   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,MatGetReordering_MPIBAIJ,MatLUFactorSymbolic_MPIBAIJ,
102893bc47c4SSatish Balay   MatLUFactorNumeric_MPIBAIJ,0,0,MatGetSize_MPIBAIJ,
102993bc47c4SSatish Balay   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,MatILUFactorSymbolic_MPIBAIJ,0,
1030905e6a2fSBarry Smith   0,0,0,MatConvertSameType_MPIBAIJ,0,0,
1031d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1032ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
10335a838052SSatish Balay   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ};
103479bdfe76SSatish Balay 
103579bdfe76SSatish Balay 
103679bdfe76SSatish Balay /*@C
103779bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
103879bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
103979bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
104079bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
104179bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
104279bdfe76SSatish Balay 
104379bdfe76SSatish Balay    Input Parameters:
104479bdfe76SSatish Balay .  comm - MPI communicator
104579bdfe76SSatish Balay .  bs   - size of blockk
104679bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
104792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
104892e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
104992e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
105092e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
105192e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
105279bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
105392e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
105479bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
105579bdfe76SSatish Balay            submatrix  (same for all local rows)
105692e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
105792e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
105892e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
105992e8d321SLois Curfman McInnes            it is zero.
106092e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
106179bdfe76SSatish Balay            submatrix (same for all local rows).
106292e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
106392e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
106492e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
106579bdfe76SSatish Balay 
106679bdfe76SSatish Balay    Output Parameter:
106779bdfe76SSatish Balay .  A - the matrix
106879bdfe76SSatish Balay 
106979bdfe76SSatish Balay    Notes:
107079bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
107179bdfe76SSatish Balay    (possibly both).
107279bdfe76SSatish Balay 
107379bdfe76SSatish Balay    Storage Information:
107479bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
107579bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
107679bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
107779bdfe76SSatish Balay    local matrix (a rectangular submatrix).
107879bdfe76SSatish Balay 
107979bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
108079bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
108179bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
108279bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
108379bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
108479bdfe76SSatish Balay 
108579bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
108679bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
108779bdfe76SSatish Balay 
108879bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
108979bdfe76SSatish Balay $         -------------------
109079bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
109179bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
109279bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
109379bdfe76SSatish Balay $         -------------------
109479bdfe76SSatish Balay $
109579bdfe76SSatish Balay 
109679bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
109779bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
109879bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
109957b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
110079bdfe76SSatish Balay 
110179bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
110279bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
110379bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
110492e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
110592e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
110692e8d321SLois Curfman McInnes    matrices and the file $(PETSC_DIR)/Performance.
110779bdfe76SSatish Balay 
110892e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
110979bdfe76SSatish Balay 
111079bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
111179bdfe76SSatish Balay @*/
111279bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
111379bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
111479bdfe76SSatish Balay {
111579bdfe76SSatish Balay   Mat          B;
111679bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
1117cee3aa6bSSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE;
111879bdfe76SSatish Balay 
111979bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
112079bdfe76SSatish Balay   *A = 0;
112179bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
112279bdfe76SSatish Balay   PLogObjectCreate(B);
112379bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
112479bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
112579bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
112679bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
112779bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
112857b952d6SSatish Balay 
112979bdfe76SSatish Balay   B->factor     = 0;
113079bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
113179bdfe76SSatish Balay 
113279bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
113379bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
113479bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
113579bdfe76SSatish Balay 
113679bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
113757b952d6SSatish Balay     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1138cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified");
1139cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified");
1140cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1141cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
114279bdfe76SSatish Balay 
114379bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
114479bdfe76SSatish Balay     work[0] = m; work[1] = n;
114579bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
114679bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
114779bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
114879bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
114979bdfe76SSatish Balay   }
115079bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
115179bdfe76SSatish Balay     Mbs = M/bs;
115279bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
115379bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
115479bdfe76SSatish Balay     m   = mbs*bs;
115579bdfe76SSatish Balay   }
115679bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
115779bdfe76SSatish Balay     Nbs = N/bs;
115879bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
115979bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
116079bdfe76SSatish Balay     n   = nbs*bs;
116179bdfe76SSatish Balay   }
1162cee3aa6bSSatish Balay   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
116379bdfe76SSatish Balay 
116479bdfe76SSatish Balay   b->m = m; B->m = m;
116579bdfe76SSatish Balay   b->n = n; B->n = n;
116679bdfe76SSatish Balay   b->N = N; B->N = N;
116779bdfe76SSatish Balay   b->M = M; B->M = M;
116879bdfe76SSatish Balay   b->bs  = bs;
116979bdfe76SSatish Balay   b->bs2 = bs*bs;
117079bdfe76SSatish Balay   b->mbs = mbs;
117179bdfe76SSatish Balay   b->nbs = nbs;
117279bdfe76SSatish Balay   b->Mbs = Mbs;
117379bdfe76SSatish Balay   b->Nbs = Nbs;
117479bdfe76SSatish Balay 
117579bdfe76SSatish Balay   /* build local table of row and column ownerships */
117679bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
117779bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
11780ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
117979bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
118079bdfe76SSatish Balay   b->rowners[0] = 0;
118179bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
118279bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
118379bdfe76SSatish Balay   }
118479bdfe76SSatish Balay   b->rstart = b->rowners[b->rank];
118579bdfe76SSatish Balay   b->rend   = b->rowners[b->rank+1];
118679bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
118779bdfe76SSatish Balay   b->cowners[0] = 0;
118879bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
118979bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
119079bdfe76SSatish Balay   }
119179bdfe76SSatish Balay   b->cstart = b->cowners[b->rank];
119279bdfe76SSatish Balay   b->cend   = b->cowners[b->rank+1];
119379bdfe76SSatish Balay 
119479bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
119579bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
119679bdfe76SSatish Balay   PLogObjectParent(B,b->A);
119779bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
119879bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
119979bdfe76SSatish Balay   PLogObjectParent(B,b->B);
120079bdfe76SSatish Balay 
120179bdfe76SSatish Balay   /* build cache for off array entries formed */
120279bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
120379bdfe76SSatish Balay   b->colmap      = 0;
120479bdfe76SSatish Balay   b->garray      = 0;
120579bdfe76SSatish Balay   b->roworiented = 1;
120679bdfe76SSatish Balay 
120779bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
120879bdfe76SSatish Balay   b->lvec      = 0;
120979bdfe76SSatish Balay   b->Mvctx     = 0;
121079bdfe76SSatish Balay 
121179bdfe76SSatish Balay   /* stuff for MatGetRow() */
121279bdfe76SSatish Balay   b->rowindices   = 0;
121379bdfe76SSatish Balay   b->rowvalues    = 0;
121479bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
121579bdfe76SSatish Balay 
121679bdfe76SSatish Balay   *A = B;
121779bdfe76SSatish Balay   return 0;
121879bdfe76SSatish Balay }
12190ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
12200ac07820SSatish Balay {
12210ac07820SSatish Balay   Mat        mat;
12220ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
12230ac07820SSatish Balay   int        ierr, len=0, flg;
12240ac07820SSatish Balay 
12250ac07820SSatish Balay   *newmat       = 0;
12260ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
12270ac07820SSatish Balay   PLogObjectCreate(mat);
12280ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
12290ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
12300ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
12310ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
12320ac07820SSatish Balay   mat->factor     = matin->factor;
12330ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
12340ac07820SSatish Balay 
12350ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
12360ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
12370ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
12380ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
12390ac07820SSatish Balay 
12400ac07820SSatish Balay   a->bs  = oldmat->bs;
12410ac07820SSatish Balay   a->bs2 = oldmat->bs2;
12420ac07820SSatish Balay   a->mbs = oldmat->mbs;
12430ac07820SSatish Balay   a->nbs = oldmat->nbs;
12440ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
12450ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
12460ac07820SSatish Balay 
12470ac07820SSatish Balay   a->rstart       = oldmat->rstart;
12480ac07820SSatish Balay   a->rend         = oldmat->rend;
12490ac07820SSatish Balay   a->cstart       = oldmat->cstart;
12500ac07820SSatish Balay   a->cend         = oldmat->cend;
12510ac07820SSatish Balay   a->size         = oldmat->size;
12520ac07820SSatish Balay   a->rank         = oldmat->rank;
12530ac07820SSatish Balay   a->insertmode   = NOT_SET_VALUES;
12540ac07820SSatish Balay   a->rowvalues    = 0;
12550ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
12560ac07820SSatish Balay 
12570ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
12580ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
12590ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
12600ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
12610ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
12620ac07820SSatish Balay   if (oldmat->colmap) {
12630ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
12640ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
12650ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
12660ac07820SSatish Balay   } else a->colmap = 0;
12670ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
12680ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
12690ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
12700ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
12710ac07820SSatish Balay   } else a->garray = 0;
12720ac07820SSatish Balay 
12730ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
12740ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
12750ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
12760ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
12770ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
12780ac07820SSatish Balay   PLogObjectParent(mat,a->A);
12790ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
12800ac07820SSatish Balay   PLogObjectParent(mat,a->B);
12810ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
12820ac07820SSatish Balay   if (flg) {
12830ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
12840ac07820SSatish Balay   }
12850ac07820SSatish Balay   *newmat = mat;
12860ac07820SSatish Balay   return 0;
12870ac07820SSatish Balay }
128857b952d6SSatish Balay 
128957b952d6SSatish Balay #include "sys.h"
129057b952d6SSatish Balay 
129157b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
129257b952d6SSatish Balay {
129357b952d6SSatish Balay   Mat          A;
129457b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
129557b952d6SSatish Balay   Scalar       *vals,*buf;
129657b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
129757b952d6SSatish Balay   MPI_Status   status;
1298cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
129957b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
130057b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
130157b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
130257b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
130357b952d6SSatish Balay 
130457b952d6SSatish Balay 
130557b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
130657b952d6SSatish Balay   bs2  = bs*bs;
130757b952d6SSatish Balay 
130857b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
130957b952d6SSatish Balay   if (!rank) {
131057b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
131157b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1312cee3aa6bSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object");
131357b952d6SSatish Balay   }
131457b952d6SSatish Balay 
131557b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
131657b952d6SSatish Balay   M = header[1]; N = header[2];
131757b952d6SSatish Balay 
131857b952d6SSatish Balay 
131957b952d6SSatish Balay   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
132057b952d6SSatish Balay 
132157b952d6SSatish Balay   /*
132257b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
132357b952d6SSatish Balay      divisible by the blocksize
132457b952d6SSatish Balay   */
132557b952d6SSatish Balay   Mbs        = M/bs;
132657b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
132757b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
132857b952d6SSatish Balay   else                  Mbs++;
132957b952d6SSatish Balay   if (extra_rows &&!rank) {
133057b952d6SSatish Balay     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
133157b952d6SSatish Balay   }
133257b952d6SSatish Balay   /* determine ownership of all rows */
133357b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
133457b952d6SSatish Balay   m   = mbs * bs;
1335cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1336cee3aa6bSSatish Balay   browners = rowners + size + 1;
133757b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
133857b952d6SSatish Balay   rowners[0] = 0;
1339cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1340cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
134157b952d6SSatish Balay   rstart = rowners[rank];
134257b952d6SSatish Balay   rend   = rowners[rank+1];
134357b952d6SSatish Balay 
134457b952d6SSatish Balay   /* distribute row lengths to all processors */
134557b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
134657b952d6SSatish Balay   if (!rank) {
134757b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
134857b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
134957b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
135057b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1351cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1352cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
135357b952d6SSatish Balay     PetscFree(sndcounts);
135457b952d6SSatish Balay   }
135557b952d6SSatish Balay   else {
135657b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
135757b952d6SSatish Balay   }
135857b952d6SSatish Balay 
135957b952d6SSatish Balay   if (!rank) {
136057b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
136157b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
136257b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
136357b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
136457b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
136557b952d6SSatish Balay         procsnz[i] += rowlengths[j];
136657b952d6SSatish Balay       }
136757b952d6SSatish Balay     }
136857b952d6SSatish Balay     PetscFree(rowlengths);
136957b952d6SSatish Balay 
137057b952d6SSatish Balay     /* determine max buffer needed and allocate it */
137157b952d6SSatish Balay     maxnz = 0;
137257b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
137357b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
137457b952d6SSatish Balay     }
137557b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
137657b952d6SSatish Balay 
137757b952d6SSatish Balay     /* read in my part of the matrix column indices  */
137857b952d6SSatish Balay     nz = procsnz[0];
137957b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
138057b952d6SSatish Balay     mycols = ibuf;
1381cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
138257b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1383cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1384cee3aa6bSSatish Balay 
138557b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
138657b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
138757b952d6SSatish Balay       nz = procsnz[i];
138857b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
138957b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
139057b952d6SSatish Balay     }
139157b952d6SSatish Balay     /* read in the stuff for the last proc */
139257b952d6SSatish Balay     if ( size != 1 ) {
139357b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
139457b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
139557b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1396cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
139757b952d6SSatish Balay     }
139857b952d6SSatish Balay     PetscFree(cols);
139957b952d6SSatish Balay   }
140057b952d6SSatish Balay   else {
140157b952d6SSatish Balay     /* determine buffer space needed for message */
140257b952d6SSatish Balay     nz = 0;
140357b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
140457b952d6SSatish Balay       nz += locrowlens[i];
140557b952d6SSatish Balay     }
140657b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
140757b952d6SSatish Balay     mycols = ibuf;
140857b952d6SSatish Balay     /* receive message of column indices*/
140957b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
141057b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1411cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
141257b952d6SSatish Balay   }
141357b952d6SSatish Balay 
141457b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1415cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1416cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
141757b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1418cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
141957b952d6SSatish Balay   masked1 = mask    + Mbs;
142057b952d6SSatish Balay   masked2 = masked1 + Mbs;
142157b952d6SSatish Balay   rowcount = 0; nzcount = 0;
142257b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
142357b952d6SSatish Balay     dcount  = 0;
142457b952d6SSatish Balay     odcount = 0;
142557b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
142657b952d6SSatish Balay       kmax = locrowlens[rowcount];
142757b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
142857b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
142957b952d6SSatish Balay         if (!mask[tmp]) {
143057b952d6SSatish Balay           mask[tmp] = 1;
143157b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
143257b952d6SSatish Balay           else masked1[dcount++] = tmp;
143357b952d6SSatish Balay         }
143457b952d6SSatish Balay       }
143557b952d6SSatish Balay       rowcount++;
143657b952d6SSatish Balay     }
1437cee3aa6bSSatish Balay 
143857b952d6SSatish Balay     dlens[i]  = dcount;
143957b952d6SSatish Balay     odlens[i] = odcount;
1440cee3aa6bSSatish Balay 
144157b952d6SSatish Balay     /* zero out the mask elements we set */
144257b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
144357b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
144457b952d6SSatish Balay   }
1445cee3aa6bSSatish Balay 
144657b952d6SSatish Balay   /* create our matrix */
144757b952d6SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
144857b952d6SSatish Balay   A = *newmat;
14496d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
145057b952d6SSatish Balay 
145157b952d6SSatish Balay   if (!rank) {
145257b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
145357b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
145457b952d6SSatish Balay     nz = procsnz[0];
145557b952d6SSatish Balay     vals = buf;
1456cee3aa6bSSatish Balay     mycols = ibuf;
1457cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
145857b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1459cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
146057b952d6SSatish Balay     /* insert into matrix */
146157b952d6SSatish Balay     jj      = rstart*bs;
146257b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
146357b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
146457b952d6SSatish Balay       mycols += locrowlens[i];
146557b952d6SSatish Balay       vals   += locrowlens[i];
146657b952d6SSatish Balay       jj++;
146757b952d6SSatish Balay     }
146857b952d6SSatish Balay     /* read in other processors( except the last one) and ship out */
146957b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
147057b952d6SSatish Balay       nz = procsnz[i];
147157b952d6SSatish Balay       vals = buf;
147257b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
147357b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
147457b952d6SSatish Balay     }
147557b952d6SSatish Balay     /* the last proc */
147657b952d6SSatish Balay     if ( size != 1 ){
147757b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1478cee3aa6bSSatish Balay       vals = buf;
147957b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
148057b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1481cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
148257b952d6SSatish Balay     }
148357b952d6SSatish Balay     PetscFree(procsnz);
148457b952d6SSatish Balay   }
148557b952d6SSatish Balay   else {
148657b952d6SSatish Balay     /* receive numeric values */
148757b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
148857b952d6SSatish Balay 
148957b952d6SSatish Balay     /* receive message of values*/
149057b952d6SSatish Balay     vals = buf;
1491cee3aa6bSSatish Balay     mycols = ibuf;
149257b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
149357b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1494cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
149557b952d6SSatish Balay 
149657b952d6SSatish Balay     /* insert into matrix */
149757b952d6SSatish Balay     jj      = rstart*bs;
1498cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
149957b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
150057b952d6SSatish Balay       mycols += locrowlens[i];
150157b952d6SSatish Balay       vals   += locrowlens[i];
150257b952d6SSatish Balay       jj++;
150357b952d6SSatish Balay     }
150457b952d6SSatish Balay   }
150557b952d6SSatish Balay   PetscFree(locrowlens);
150657b952d6SSatish Balay   PetscFree(buf);
150757b952d6SSatish Balay   PetscFree(ibuf);
150857b952d6SSatish Balay   PetscFree(rowners);
150957b952d6SSatish Balay   PetscFree(dlens);
1510cee3aa6bSSatish Balay   PetscFree(mask);
15116d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15126d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
151357b952d6SSatish Balay   return 0;
151457b952d6SSatish Balay }
151557b952d6SSatish Balay 
151657b952d6SSatish Balay 
1517