xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 0ac07820caad201d9f56837b0224a85c76e3c049)
179bdfe76SSatish Balay #ifndef lint
2*0ac07820SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.15 1996/07/12 19:09:33 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
579bdfe76SSatish Balay #include "mpibaij.h"
679bdfe76SSatish Balay 
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2357b952d6SSatish Balay 
2457b952d6SSatish Balay /* local utility routine that creates a mapping from the global column
2557b952d6SSatish Balay number to the local number in the off-diagonal part of the local
2657b952d6SSatish Balay storage of the matrix.  This is done in a non scable way since the
2757b952d6SSatish Balay length of colmap equals the global matrix length.
2857b952d6SSatish Balay */
2957b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3057b952d6SSatish Balay {
3157b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3257b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
3357b952d6SSatish Balay   int        nbs = B->nbs,i;
3457b952d6SSatish Balay 
3557b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
3657b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3757b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
3857b952d6SSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i;
3957b952d6SSatish Balay   return 0;
4057b952d6SSatish Balay }
4157b952d6SSatish Balay 
4257b952d6SSatish Balay 
43acdf5bf4SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm)
4457b952d6SSatish Balay {
4557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4657b952d6SSatish Balay   int         ierr;
4757b952d6SSatish Balay   if (baij->size == 1) {
4857b952d6SSatish Balay     ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr);
4957b952d6SSatish Balay   } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel");
5057b952d6SSatish Balay   return 0;
5157b952d6SSatish Balay }
5257b952d6SSatish Balay 
5357b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
5457b952d6SSatish Balay {
5557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
5657b952d6SSatish Balay   Scalar      value;
5757b952d6SSatish Balay   int         ierr,i,j, rstart = baij->rstart, rend = baij->rend;
5857b952d6SSatish Balay   int         cstart = baij->cstart, cend = baij->cend,row,col;
5957b952d6SSatish Balay   int         roworiented = baij->roworiented,rstart_orig,rend_orig;
6057b952d6SSatish Balay   int         cstart_orig,cend_orig,bs=baij->bs;
6157b952d6SSatish Balay 
6257b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
6357b952d6SSatish Balay     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
6457b952d6SSatish Balay   }
6557b952d6SSatish Balay   baij->insertmode = addv;
6657b952d6SSatish Balay   rstart_orig = rstart*bs;
6757b952d6SSatish Balay   rend_orig   = rend*bs;
6857b952d6SSatish Balay   cstart_orig = cstart*bs;
6957b952d6SSatish Balay   cend_orig   = cend*bs;
7057b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
7157b952d6SSatish Balay     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
7257b952d6SSatish Balay     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
7357b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
7457b952d6SSatish Balay       row = im[i] - rstart_orig;
7557b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
7657b952d6SSatish Balay         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");
7757b952d6SSatish Balay         if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");
7857b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
7957b952d6SSatish Balay           col = in[j] - cstart_orig;
8057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
8157b952d6SSatish Balay           ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
8257b952d6SSatish Balay         }
8357b952d6SSatish Balay         else {
8457b952d6SSatish Balay           if (mat->was_assembled) {
8557b952d6SSatish Balay             if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);}
8658667388SSatish Balay             col = baij->colmap[in[j]/bs] +in[j]%bs;
8757b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
8857b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
8957b952d6SSatish Balay               col =  in[j];
9057b952d6SSatish Balay             }
9157b952d6SSatish Balay           }
9257b952d6SSatish Balay           else col = in[j];
9357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
9457b952d6SSatish Balay           ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
9557b952d6SSatish Balay         }
9657b952d6SSatish Balay       }
9757b952d6SSatish Balay     }
9857b952d6SSatish Balay     else {
9957b952d6SSatish Balay       if (roworiented) {
10057b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
10157b952d6SSatish Balay       }
10257b952d6SSatish Balay       else {
10357b952d6SSatish Balay         row = im[i];
10457b952d6SSatish Balay         for ( j=0; j<n; j++ ) {
10557b952d6SSatish Balay           ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
10657b952d6SSatish Balay         }
10757b952d6SSatish Balay       }
10857b952d6SSatish Balay     }
10957b952d6SSatish Balay   }
11057b952d6SSatish Balay   return 0;
11157b952d6SSatish Balay }
11257b952d6SSatish Balay 
113d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
114d6de1c52SSatish Balay {
115d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
116d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
117d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
118d6de1c52SSatish Balay 
119d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
120d6de1c52SSatish Balay     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row");
121d6de1c52SSatish Balay     if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large");
122d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
123d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
124d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
125d6de1c52SSatish Balay         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column");
126d6de1c52SSatish Balay         if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large");
127d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
128d6de1c52SSatish Balay           col = idxn[j] - bscstart;
129d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
130d6de1c52SSatish Balay         }
131d6de1c52SSatish Balay         else {
132acdf5bf4SSatish Balay           if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);}
133d9d09a02SSatish Balay           if ( baij->garray[baij->colmap[idxn[j]/bs]] != idxn[j]/bs ) *(v+i*n+j) = 0.0;
134d9d09a02SSatish Balay           else {
135d9d09a02SSatish Balay             col = baij->colmap[idxn[j]/bs]*bs + idxn[j]%bs;
136d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
137d6de1c52SSatish Balay           }
138d6de1c52SSatish Balay         }
139d6de1c52SSatish Balay       }
140d9d09a02SSatish Balay     }
141d6de1c52SSatish Balay     else {
142d6de1c52SSatish Balay       SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported");
143d6de1c52SSatish Balay     }
144d6de1c52SSatish Balay   }
145d6de1c52SSatish Balay   return 0;
146d6de1c52SSatish Balay }
147d6de1c52SSatish Balay 
148d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
149d6de1c52SSatish Balay {
150d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
151d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
152acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
153d6de1c52SSatish Balay   double     sum = 0.0;
154d6de1c52SSatish Balay   Scalar     *v;
155d6de1c52SSatish Balay 
156d6de1c52SSatish Balay   if (baij->size == 1) {
157d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
158d6de1c52SSatish Balay   } else {
159d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
160d6de1c52SSatish Balay       v = amat->a;
161d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
162d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
163d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
164d6de1c52SSatish Balay #else
165d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
166d6de1c52SSatish Balay #endif
167d6de1c52SSatish Balay       }
168d6de1c52SSatish Balay       v = bmat->a;
169d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
170d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
171d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
172d6de1c52SSatish Balay #else
173d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
174d6de1c52SSatish Balay #endif
175d6de1c52SSatish Balay       }
176d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
177d6de1c52SSatish Balay       *norm = sqrt(*norm);
178d6de1c52SSatish Balay     }
179acdf5bf4SSatish Balay     else
180acdf5bf4SSatish Balay       SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
181d6de1c52SSatish Balay   }
182d6de1c52SSatish Balay   return 0;
183d6de1c52SSatish Balay }
18457b952d6SSatish Balay 
18557b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
18657b952d6SSatish Balay {
18757b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
18857b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
18957b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
19057b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
19157b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
19257b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
19357b952d6SSatish Balay   InsertMode  addv;
19457b952d6SSatish Balay   Scalar      *rvalues,*svalues;
19557b952d6SSatish Balay 
19657b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
19757b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
19857b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
19957b952d6SSatish Balay     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
20057b952d6SSatish Balay   }
20157b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
20257b952d6SSatish Balay 
20357b952d6SSatish Balay   /*  first count number of contributors to each processor */
20457b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
20557b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
20657b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
20757b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
20857b952d6SSatish Balay     idx = baij->stash.idx[i];
20957b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
21057b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
21157b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
21257b952d6SSatish Balay       }
21357b952d6SSatish Balay     }
21457b952d6SSatish Balay   }
21557b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
21657b952d6SSatish Balay 
21757b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
21857b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
21957b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
22057b952d6SSatish Balay   nreceives = work[rank];
22157b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
22257b952d6SSatish Balay   nmax = work[rank];
22357b952d6SSatish Balay   PetscFree(work);
22457b952d6SSatish Balay 
22557b952d6SSatish Balay   /* post receives:
22657b952d6SSatish Balay        1) each message will consist of ordered pairs
22757b952d6SSatish Balay      (global index,value) we store the global index as a double
22857b952d6SSatish Balay      to simplify the message passing.
22957b952d6SSatish Balay        2) since we don't know how long each individual message is we
23057b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
23157b952d6SSatish Balay      this is a lot of wasted space.
23257b952d6SSatish Balay 
23357b952d6SSatish Balay 
23457b952d6SSatish Balay        This could be done better.
23557b952d6SSatish Balay   */
23657b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
23757b952d6SSatish Balay   CHKPTRQ(rvalues);
23857b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
23957b952d6SSatish Balay   CHKPTRQ(recv_waits);
24057b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
24157b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
24257b952d6SSatish Balay               comm,recv_waits+i);
24357b952d6SSatish Balay   }
24457b952d6SSatish Balay 
24557b952d6SSatish Balay   /* do sends:
24657b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
24757b952d6SSatish Balay          the ith processor
24857b952d6SSatish Balay   */
24957b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
25057b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
25157b952d6SSatish Balay   CHKPTRQ(send_waits);
25257b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
25357b952d6SSatish Balay   starts[0] = 0;
25457b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
25557b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
25657b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
25757b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
25857b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
25957b952d6SSatish Balay   }
26057b952d6SSatish Balay   PetscFree(owner);
26157b952d6SSatish Balay   starts[0] = 0;
26257b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
26357b952d6SSatish Balay   count = 0;
26457b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
26557b952d6SSatish Balay     if (procs[i]) {
26657b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
26757b952d6SSatish Balay                 comm,send_waits+count++);
26857b952d6SSatish Balay     }
26957b952d6SSatish Balay   }
27057b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
27157b952d6SSatish Balay 
27257b952d6SSatish Balay   /* Free cache space */
27357b952d6SSatish Balay   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n);
27457b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
27557b952d6SSatish Balay 
27657b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
27757b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
27857b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
27957b952d6SSatish Balay   baij->rmax       = nmax;
28057b952d6SSatish Balay 
28157b952d6SSatish Balay   return 0;
28257b952d6SSatish Balay }
28357b952d6SSatish Balay 
28457b952d6SSatish Balay 
28557b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
28657b952d6SSatish Balay {
28757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
28857b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
28957b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
29057b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
29157b952d6SSatish Balay   Scalar      *values,val;
29257b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
29357b952d6SSatish Balay 
29457b952d6SSatish Balay   /*  wait on receives */
29557b952d6SSatish Balay   while (count) {
29657b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
29757b952d6SSatish Balay     /* unpack receives into our local space */
29857b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
29957b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
30057b952d6SSatish Balay     n = n/3;
30157b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
30257b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
30357b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
30457b952d6SSatish Balay       val = values[3*i+2];
30557b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
30657b952d6SSatish Balay         col -= baij->cstart*bs;
30757b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
30857b952d6SSatish Balay       }
30957b952d6SSatish Balay       else {
31057b952d6SSatish Balay         if (mat->was_assembled) {
31157b952d6SSatish Balay           if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);}
31257b952d6SSatish Balay           col = baij->colmap[col/bs]*bs + col%bs;
31357b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
31457b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
31557b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
31657b952d6SSatish Balay           }
31757b952d6SSatish Balay         }
31857b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
31957b952d6SSatish Balay       }
32057b952d6SSatish Balay     }
32157b952d6SSatish Balay     count--;
32257b952d6SSatish Balay   }
32357b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
32457b952d6SSatish Balay 
32557b952d6SSatish Balay   /* wait on sends */
32657b952d6SSatish Balay   if (baij->nsends) {
32757b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
32857b952d6SSatish Balay     CHKPTRQ(send_status);
32957b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
33057b952d6SSatish Balay     PetscFree(send_status);
33157b952d6SSatish Balay   }
33257b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
33357b952d6SSatish Balay 
33457b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
33557b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
33657b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
33757b952d6SSatish Balay 
33857b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
33957b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
34057b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
34157b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
34257b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
34357b952d6SSatish Balay   }
34457b952d6SSatish Balay 
3456d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
34657b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
34757b952d6SSatish Balay   }
34857b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
34957b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
35057b952d6SSatish Balay 
35157b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
35257b952d6SSatish Balay   return 0;
35357b952d6SSatish Balay }
35457b952d6SSatish Balay 
35557b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
35657b952d6SSatish Balay {
35757b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
35857b952d6SSatish Balay   int          ierr;
35957b952d6SSatish Balay 
36057b952d6SSatish Balay   if (baij->size == 1) {
36157b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
36257b952d6SSatish Balay   }
36357b952d6SSatish Balay   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
36457b952d6SSatish Balay   return 0;
36557b952d6SSatish Balay }
36657b952d6SSatish Balay 
36757b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
36857b952d6SSatish Balay {
36957b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
370cee3aa6bSSatish Balay   int          ierr, format,rank,bs=baij->bs;
37157b952d6SSatish Balay   FILE         *fd;
37257b952d6SSatish Balay   ViewerType   vtype;
37357b952d6SSatish Balay 
37457b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
37557b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
37657b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
37757b952d6SSatish Balay     if (format == ASCII_FORMAT_INFO_DETAILED) {
37857b952d6SSatish Balay       int nz, nzalloc, mem;
37957b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
38057b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
38157b952d6SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
38257b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
38357b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
38457b952d6SSatish Balay               rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem);
38557b952d6SSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
38657b952d6SSatish Balay       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs);
38757b952d6SSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
38857b952d6SSatish Balay       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs);
38957b952d6SSatish Balay       fflush(fd);
39057b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
39157b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
39257b952d6SSatish Balay       return 0;
39357b952d6SSatish Balay     }
39457b952d6SSatish Balay     else if (format == ASCII_FORMAT_INFO) {
39557b952d6SSatish Balay       return 0;
39657b952d6SSatish Balay     }
39757b952d6SSatish Balay   }
39857b952d6SSatish Balay 
39957b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
40057b952d6SSatish Balay     Draw       draw;
40157b952d6SSatish Balay     PetscTruth isnull;
40257b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
40357b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
40457b952d6SSatish Balay   }
40557b952d6SSatish Balay 
40657b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
40757b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
40857b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
40957b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
41057b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
41157b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
41257b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
41357b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
41457b952d6SSatish Balay     fflush(fd);
41557b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
41657b952d6SSatish Balay   }
41757b952d6SSatish Balay   else {
41857b952d6SSatish Balay     int size = baij->size;
41957b952d6SSatish Balay     rank = baij->rank;
42057b952d6SSatish Balay     if (size == 1) {
42157b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
42257b952d6SSatish Balay     }
42357b952d6SSatish Balay     else {
42457b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
42557b952d6SSatish Balay       Mat         A;
42657b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
42757b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
42857b952d6SSatish Balay       int         mbs=baij->mbs;
42957b952d6SSatish Balay       Scalar      *a;
43057b952d6SSatish Balay 
43157b952d6SSatish Balay       if (!rank) {
432cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
43357b952d6SSatish Balay         CHKERRQ(ierr);
43457b952d6SSatish Balay       }
43557b952d6SSatish Balay       else {
436cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
43757b952d6SSatish Balay         CHKERRQ(ierr);
43857b952d6SSatish Balay       }
43957b952d6SSatish Balay       PLogObjectParent(mat,A);
44057b952d6SSatish Balay 
44157b952d6SSatish Balay       /* copy over the A part */
44257b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
44357b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
44457b952d6SSatish Balay       row = baij->rstart;
44557b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
44657b952d6SSatish Balay 
44757b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
44857b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
44957b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
45057b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
45157b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
45257b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
453cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
454cee3aa6bSSatish Balay             col++; a += bs;
45557b952d6SSatish Balay           }
45657b952d6SSatish Balay         }
45757b952d6SSatish Balay       }
45857b952d6SSatish Balay       /* copy over the B part */
45957b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
46057b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
46157b952d6SSatish Balay       row = baij->rstart*bs;
46257b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
46357b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
46457b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
46557b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
46657b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
46757b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
468cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
469cee3aa6bSSatish Balay             col++; a += bs;
47057b952d6SSatish Balay           }
47157b952d6SSatish Balay         }
47257b952d6SSatish Balay       }
47357b952d6SSatish Balay       PetscFree(rvals);
4746d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
4756d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
47657b952d6SSatish Balay       if (!rank) {
47757b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
47857b952d6SSatish Balay       }
47957b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
48057b952d6SSatish Balay     }
48157b952d6SSatish Balay   }
48257b952d6SSatish Balay   return 0;
48357b952d6SSatish Balay }
48457b952d6SSatish Balay 
48557b952d6SSatish Balay 
48657b952d6SSatish Balay 
48757b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
48857b952d6SSatish Balay {
48957b952d6SSatish Balay   Mat         mat = (Mat) obj;
49057b952d6SSatish Balay   int         ierr;
49157b952d6SSatish Balay   ViewerType  vtype;
49257b952d6SSatish Balay 
49357b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
49457b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
49557b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
49657b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
49757b952d6SSatish Balay   }
49857b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
49957b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
50057b952d6SSatish Balay   }
50157b952d6SSatish Balay   return 0;
50257b952d6SSatish Balay }
50357b952d6SSatish Balay 
50479bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
50579bdfe76SSatish Balay {
50679bdfe76SSatish Balay   Mat         mat = (Mat) obj;
50779bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
50879bdfe76SSatish Balay   int         ierr;
50979bdfe76SSatish Balay 
51079bdfe76SSatish Balay #if defined(PETSC_LOG)
51179bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
51279bdfe76SSatish Balay #endif
51379bdfe76SSatish Balay 
51479bdfe76SSatish Balay   PetscFree(baij->rowners);
51579bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
51679bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
51779bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
51879bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
51979bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
52079bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
52179bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
52279bdfe76SSatish Balay   PetscFree(baij);
52379bdfe76SSatish Balay   PLogObjectDestroy(mat);
52479bdfe76SSatish Balay   PetscHeaderDestroy(mat);
52579bdfe76SSatish Balay   return 0;
52679bdfe76SSatish Balay }
52779bdfe76SSatish Balay 
528cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
529cee3aa6bSSatish Balay {
530cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
53147b4a8eaSLois Curfman McInnes   int         ierr, nt;
532cee3aa6bSSatish Balay 
53347b4a8eaSLois Curfman McInnes   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
53447b4a8eaSLois Curfman McInnes   if (nt != a->n) {
535*0ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and xx");
53647b4a8eaSLois Curfman McInnes   }
53747b4a8eaSLois Curfman McInnes   ierr = VecGetLocalSize(yy,&nt);  CHKERRQ(ierr);
53847b4a8eaSLois Curfman McInnes   if (nt != a->m) {
539*0ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and yy");
54047b4a8eaSLois Curfman McInnes   }
541cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
542cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
543cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
544cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
545cee3aa6bSSatish Balay   return 0;
546cee3aa6bSSatish Balay }
547cee3aa6bSSatish Balay 
548cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
549cee3aa6bSSatish Balay {
550cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
551cee3aa6bSSatish Balay   int        ierr;
552cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
553cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
554cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
555cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
556cee3aa6bSSatish Balay   return 0;
557cee3aa6bSSatish Balay }
558cee3aa6bSSatish Balay 
559cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
560cee3aa6bSSatish Balay {
561cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
562cee3aa6bSSatish Balay   int        ierr;
563cee3aa6bSSatish Balay 
564cee3aa6bSSatish Balay   /* do nondiagonal part */
565cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
566cee3aa6bSSatish Balay   /* send it on its way */
567cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
568cee3aa6bSSatish Balay                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
569cee3aa6bSSatish Balay   /* do local part */
570cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
571cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
572cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
573cee3aa6bSSatish Balay   /* but is not perhaps always true. */
574cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
575cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
576cee3aa6bSSatish Balay   return 0;
577cee3aa6bSSatish Balay }
578cee3aa6bSSatish Balay 
579cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
580cee3aa6bSSatish Balay {
581cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
582cee3aa6bSSatish Balay   int        ierr;
583cee3aa6bSSatish Balay 
584cee3aa6bSSatish Balay   /* do nondiagonal part */
585cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
586cee3aa6bSSatish Balay   /* send it on its way */
587cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
588cee3aa6bSSatish Balay                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
589cee3aa6bSSatish Balay   /* do local part */
590cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
591cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
592cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
593cee3aa6bSSatish Balay   /* but is not perhaps always true. */
594cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
595cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
596cee3aa6bSSatish Balay   return 0;
597cee3aa6bSSatish Balay }
598cee3aa6bSSatish Balay 
599cee3aa6bSSatish Balay /*
600cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
601cee3aa6bSSatish Balay    diagonal block
602cee3aa6bSSatish Balay */
603cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
604cee3aa6bSSatish Balay {
605cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
606cee3aa6bSSatish Balay   if (a->M != a->N)
607cee3aa6bSSatish Balay     SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block");
608cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
609cee3aa6bSSatish Balay }
610cee3aa6bSSatish Balay 
611cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
612cee3aa6bSSatish Balay {
613cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
614cee3aa6bSSatish Balay   int        ierr;
615cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
616cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
617cee3aa6bSSatish Balay   return 0;
618cee3aa6bSSatish Balay }
61957b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
62057b952d6SSatish Balay {
62157b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
62257b952d6SSatish Balay   *m = mat->M; *n = mat->N;
62357b952d6SSatish Balay   return 0;
62457b952d6SSatish Balay }
62557b952d6SSatish Balay 
62657b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
62757b952d6SSatish Balay {
62857b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
62957b952d6SSatish Balay   *m = mat->m; *n = mat->N;
63057b952d6SSatish Balay   return 0;
63157b952d6SSatish Balay }
63257b952d6SSatish Balay 
63357b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
63457b952d6SSatish Balay {
63557b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
63657b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
63757b952d6SSatish Balay   return 0;
63857b952d6SSatish Balay }
63957b952d6SSatish Balay 
640acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
641acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
642acdf5bf4SSatish Balay 
643acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
644acdf5bf4SSatish Balay {
645acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
646acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
647acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
648d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
649d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
650acdf5bf4SSatish Balay 
651acdf5bf4SSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active");
652acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
653acdf5bf4SSatish Balay 
654acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
655acdf5bf4SSatish Balay     /*
656acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
657acdf5bf4SSatish Balay     */
658acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
659bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
660bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
661acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
662acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
663acdf5bf4SSatish Balay     }
664acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
665acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
666acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
667acdf5bf4SSatish Balay   }
668acdf5bf4SSatish Balay 
669acdf5bf4SSatish Balay 
670d9d09a02SSatish Balay   if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows")
671d9d09a02SSatish Balay   lrow = row - brstart;
672acdf5bf4SSatish Balay 
673acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
674acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
675acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
676acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
677acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
678acdf5bf4SSatish Balay   nztot = nzA + nzB;
679acdf5bf4SSatish Balay 
680acdf5bf4SSatish Balay   cmap  = mat->garray;
681acdf5bf4SSatish Balay   if (v  || idx) {
682acdf5bf4SSatish Balay     if (nztot) {
683acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
684acdf5bf4SSatish Balay       int imark = -1;
685acdf5bf4SSatish Balay       if (v) {
686acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
687acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
688d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
689acdf5bf4SSatish Balay           else break;
690acdf5bf4SSatish Balay         }
691acdf5bf4SSatish Balay         imark = i;
692acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
693acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
694acdf5bf4SSatish Balay       }
695acdf5bf4SSatish Balay       if (idx) {
696acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
697acdf5bf4SSatish Balay         if (imark > -1) {
698acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
699bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
700acdf5bf4SSatish Balay           }
701acdf5bf4SSatish Balay         } else {
702acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
703d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
704d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
705acdf5bf4SSatish Balay             else break;
706acdf5bf4SSatish Balay           }
707acdf5bf4SSatish Balay           imark = i;
708acdf5bf4SSatish Balay         }
709d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
710d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
711acdf5bf4SSatish Balay       }
712acdf5bf4SSatish Balay     }
713d212a18eSSatish Balay     else {
714d212a18eSSatish Balay       if (idx) *idx = 0;
715d212a18eSSatish Balay       if (v)   *v   = 0;
716d212a18eSSatish Balay     }
717acdf5bf4SSatish Balay   }
718acdf5bf4SSatish Balay   *nz = nztot;
719acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
720acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
721acdf5bf4SSatish Balay   return 0;
722acdf5bf4SSatish Balay }
723acdf5bf4SSatish Balay 
724acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
725acdf5bf4SSatish Balay {
726acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
727acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
728acdf5bf4SSatish Balay     SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called");
729acdf5bf4SSatish Balay   }
730acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
731acdf5bf4SSatish Balay   return 0;
732acdf5bf4SSatish Balay }
733acdf5bf4SSatish Balay 
7345a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat, int *bs)
7355a838052SSatish Balay {
7365a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
7375a838052SSatish Balay   *bs = baij->bs;
7385a838052SSatish Balay   return 0;
7395a838052SSatish Balay }
7405a838052SSatish Balay 
74158667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A)
74258667388SSatish Balay {
74358667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
74458667388SSatish Balay   int         ierr;
74558667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
74658667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
74758667388SSatish Balay   return 0;
74858667388SSatish Balay }
749*0ac07820SSatish Balay 
750*0ac07820SSatish Balay static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,int *nz,
751*0ac07820SSatish Balay                              int *nzalloc,int *mem)
752*0ac07820SSatish Balay {
753*0ac07820SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
754*0ac07820SSatish Balay   Mat         A = mat->A, B = mat->B;
755*0ac07820SSatish Balay   int         ierr, isend[3], irecv[3], nzA, nzallocA, memA;
756*0ac07820SSatish Balay 
757*0ac07820SSatish Balay   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
758*0ac07820SSatish Balay   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
759*0ac07820SSatish Balay   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
760*0ac07820SSatish Balay   if (flag == MAT_LOCAL) {
761*0ac07820SSatish Balay     if (nz)       *nz      = isend[0];
762*0ac07820SSatish Balay     if (nzalloc)  *nzalloc = isend[1];
763*0ac07820SSatish Balay     if (mem)      *mem     = isend[2];
764*0ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
765*0ac07820SSatish Balay     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
766*0ac07820SSatish Balay     if (nz)      *nz      = irecv[0];
767*0ac07820SSatish Balay     if (nzalloc) *nzalloc = irecv[1];
768*0ac07820SSatish Balay     if (mem)     *mem     = irecv[2];
769*0ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
770*0ac07820SSatish Balay     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
771*0ac07820SSatish Balay     if (nz)      *nz      = irecv[0];
772*0ac07820SSatish Balay     if (nzalloc) *nzalloc = irecv[1];
773*0ac07820SSatish Balay     if (mem)     *mem     = irecv[2];
774*0ac07820SSatish Balay   }
775*0ac07820SSatish Balay   return 0;
776*0ac07820SSatish Balay }
777*0ac07820SSatish Balay 
77858667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op)
77958667388SSatish Balay {
78058667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
78158667388SSatish Balay 
78258667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
78358667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
78458667388SSatish Balay       op == MAT_COLUMNS_SORTED ||
78558667388SSatish Balay       op == MAT_ROW_ORIENTED) {
78658667388SSatish Balay         MatSetOption(a->A,op);
78758667388SSatish Balay         MatSetOption(a->B,op);
78858667388SSatish Balay   }
78958667388SSatish Balay   else if (op == MAT_ROWS_SORTED ||
79058667388SSatish Balay            op == MAT_SYMMETRIC ||
79158667388SSatish Balay            op == MAT_STRUCTURALLY_SYMMETRIC ||
79258667388SSatish Balay            op == MAT_YES_NEW_DIAGONALS)
79358667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
79458667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
79558667388SSatish Balay     a->roworiented = 0;
79658667388SSatish Balay     MatSetOption(a->A,op);
79758667388SSatish Balay     MatSetOption(a->B,op);
79858667388SSatish Balay   }
79958667388SSatish Balay   else if (op == MAT_NO_NEW_DIAGONALS)
800*0ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:MAT_NO_NEW_DIAGONALS");}
80158667388SSatish Balay   else
802*0ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:unknown option");}
80358667388SSatish Balay   return 0;
80458667388SSatish Balay }
80558667388SSatish Balay 
806*0ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
807*0ac07820SSatish Balay {
808*0ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
809*0ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
810*0ac07820SSatish Balay   Mat        B;
811*0ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
812*0ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
813*0ac07820SSatish Balay   Scalar     *a;
814*0ac07820SSatish Balay 
815*0ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
816*0ac07820SSatish Balay     SETERRQ(1,"MatTranspose_MPIBAIJ:Square matrix only for in-place");
817*0ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
818*0ac07820SSatish Balay   CHKERRQ(ierr);
819*0ac07820SSatish Balay 
820*0ac07820SSatish Balay   /* copy over the A part */
821*0ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
822*0ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
823*0ac07820SSatish Balay   row = baij->rstart;
824*0ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
825*0ac07820SSatish Balay 
826*0ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
827*0ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
828*0ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
829*0ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
830*0ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
831*0ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
832*0ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
833*0ac07820SSatish Balay         col++; a += bs;
834*0ac07820SSatish Balay       }
835*0ac07820SSatish Balay     }
836*0ac07820SSatish Balay   }
837*0ac07820SSatish Balay   /* copy over the B part */
838*0ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
839*0ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
840*0ac07820SSatish Balay   row = baij->rstart*bs;
841*0ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
842*0ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
843*0ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
844*0ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
845*0ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
846*0ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
847*0ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
848*0ac07820SSatish Balay         col++; a += bs;
849*0ac07820SSatish Balay       }
850*0ac07820SSatish Balay     }
851*0ac07820SSatish Balay   }
852*0ac07820SSatish Balay   PetscFree(rvals);
853*0ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
854*0ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
855*0ac07820SSatish Balay 
856*0ac07820SSatish Balay   if (matout != PETSC_NULL) {
857*0ac07820SSatish Balay     *matout = B;
858*0ac07820SSatish Balay   } else {
859*0ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
860*0ac07820SSatish Balay     PetscFree(baij->rowners);
861*0ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
862*0ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
863*0ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
864*0ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
865*0ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
866*0ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
867*0ac07820SSatish Balay     PetscFree(baij);
868*0ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
869*0ac07820SSatish Balay     PetscHeaderDestroy(B);
870*0ac07820SSatish Balay   }
871*0ac07820SSatish Balay   return 0;
872*0ac07820SSatish Balay }
873*0ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
874*0ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
875*0ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
876*0ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
877*0ac07820SSatish Balay    routine.
878*0ac07820SSatish Balay */
879*0ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
880*0ac07820SSatish Balay {
881*0ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
882*0ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
883*0ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
884*0ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
885*0ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
886*0ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
887*0ac07820SSatish Balay   MPI_Comm       comm = A->comm;
888*0ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
889*0ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
890*0ac07820SSatish Balay   IS             istmp;
891*0ac07820SSatish Balay 
892*0ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
893*0ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
894*0ac07820SSatish Balay 
895*0ac07820SSatish Balay   /*  first count number of contributors to each processor */
896*0ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
897*0ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
898*0ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
899*0ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
900*0ac07820SSatish Balay     idx = rows[i];
901*0ac07820SSatish Balay     found = 0;
902*0ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
903*0ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
904*0ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
905*0ac07820SSatish Balay       }
906*0ac07820SSatish Balay     }
907*0ac07820SSatish Balay     if (!found) SETERRQ(1,"MatZeroRows_MPIBAIJ:Index out of range");
908*0ac07820SSatish Balay   }
909*0ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
910*0ac07820SSatish Balay 
911*0ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
912*0ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
913*0ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
914*0ac07820SSatish Balay   nrecvs = work[rank];
915*0ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
916*0ac07820SSatish Balay   nmax = work[rank];
917*0ac07820SSatish Balay   PetscFree(work);
918*0ac07820SSatish Balay 
919*0ac07820SSatish Balay   /* post receives:   */
920*0ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
921*0ac07820SSatish Balay   CHKPTRQ(rvalues);
922*0ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
923*0ac07820SSatish Balay   CHKPTRQ(recv_waits);
924*0ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
925*0ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
926*0ac07820SSatish Balay   }
927*0ac07820SSatish Balay 
928*0ac07820SSatish Balay   /* do sends:
929*0ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
930*0ac07820SSatish Balay          the ith processor
931*0ac07820SSatish Balay   */
932*0ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
933*0ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
934*0ac07820SSatish Balay   CHKPTRQ(send_waits);
935*0ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
936*0ac07820SSatish Balay   starts[0] = 0;
937*0ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
938*0ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
939*0ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
940*0ac07820SSatish Balay   }
941*0ac07820SSatish Balay   ISRestoreIndices(is,&rows);
942*0ac07820SSatish Balay 
943*0ac07820SSatish Balay   starts[0] = 0;
944*0ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
945*0ac07820SSatish Balay   count = 0;
946*0ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
947*0ac07820SSatish Balay     if (procs[i]) {
948*0ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
949*0ac07820SSatish Balay     }
950*0ac07820SSatish Balay   }
951*0ac07820SSatish Balay   PetscFree(starts);
952*0ac07820SSatish Balay 
953*0ac07820SSatish Balay   base = owners[rank]*bs;
954*0ac07820SSatish Balay 
955*0ac07820SSatish Balay   /*  wait on receives */
956*0ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
957*0ac07820SSatish Balay   source = lens + nrecvs;
958*0ac07820SSatish Balay   count  = nrecvs; slen = 0;
959*0ac07820SSatish Balay   while (count) {
960*0ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
961*0ac07820SSatish Balay     /* unpack receives into our local space */
962*0ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
963*0ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
964*0ac07820SSatish Balay     lens[imdex]  = n;
965*0ac07820SSatish Balay     slen += n;
966*0ac07820SSatish Balay     count--;
967*0ac07820SSatish Balay   }
968*0ac07820SSatish Balay   PetscFree(recv_waits);
969*0ac07820SSatish Balay 
970*0ac07820SSatish Balay   /* move the data into the send scatter */
971*0ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
972*0ac07820SSatish Balay   count = 0;
973*0ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
974*0ac07820SSatish Balay     values = rvalues + i*nmax;
975*0ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
976*0ac07820SSatish Balay       lrows[count++] = values[j] - base;
977*0ac07820SSatish Balay     }
978*0ac07820SSatish Balay   }
979*0ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
980*0ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
981*0ac07820SSatish Balay 
982*0ac07820SSatish Balay   /* actually zap the local rows */
983*0ac07820SSatish Balay   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
984*0ac07820SSatish Balay   PLogObjectParent(A,istmp);
985*0ac07820SSatish Balay   PetscFree(lrows);
986*0ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
987*0ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
988*0ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
989*0ac07820SSatish Balay 
990*0ac07820SSatish Balay   /* wait on sends */
991*0ac07820SSatish Balay   if (nsends) {
992*0ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
993*0ac07820SSatish Balay     CHKPTRQ(send_status);
994*0ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
995*0ac07820SSatish Balay     PetscFree(send_status);
996*0ac07820SSatish Balay   }
997*0ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
998*0ac07820SSatish Balay 
999*0ac07820SSatish Balay   return 0;
1000*0ac07820SSatish Balay }
1001*0ac07820SSatish Balay 
1002*0ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
1003*0ac07820SSatish Balay 
100479bdfe76SSatish Balay /* -------------------------------------------------------------------*/
100579bdfe76SSatish Balay static struct _MatOps MatOps = {
1006bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
100793bc47c4SSatish Balay   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,MatSolve_MPIBAIJ,
100893bc47c4SSatish Balay   MatSolveAdd_MPIBAIJ,MatSolveTrans_MPIBAIJ,MatSolveTransAdd_MPIBAIJ,MatLUFactor_MPIBAIJ,
1009*0ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
1010acdf5bf4SSatish Balay   0,MatGetDiagonal_MPIBAIJ,0,MatNorm_MPIBAIJ,
101158667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
1012*0ac07820SSatish Balay   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,MatGetReordering_MPIBAIJ,MatLUFactorSymbolic_MPIBAIJ,
101393bc47c4SSatish Balay   MatLUFactorNumeric_MPIBAIJ,0,0,MatGetSize_MPIBAIJ,
101493bc47c4SSatish Balay   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,MatILUFactorSymbolic_MPIBAIJ,0,
101579bdfe76SSatish Balay   0,0,0,0,
1016*0ac07820SSatish Balay   0,MatConvertSameType_MPIBAIJ,0,0,
1017d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1018d212a18eSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,0,
10195a838052SSatish Balay   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ};
102079bdfe76SSatish Balay 
102179bdfe76SSatish Balay 
102279bdfe76SSatish Balay /*@C
102379bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
102479bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
102579bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
102679bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
102779bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
102879bdfe76SSatish Balay 
102979bdfe76SSatish Balay    Input Parameters:
103079bdfe76SSatish Balay .  comm - MPI communicator
103179bdfe76SSatish Balay .  bs   - size of blockk
103279bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
103392e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
103492e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
103592e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
103692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
103792e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
103879bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
103992e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
104079bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
104179bdfe76SSatish Balay            submatrix  (same for all local rows)
104292e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
104392e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
104492e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
104592e8d321SLois Curfman McInnes            it is zero.
104692e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
104779bdfe76SSatish Balay            submatrix (same for all local rows).
104892e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
104992e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
105092e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
105179bdfe76SSatish Balay 
105279bdfe76SSatish Balay    Output Parameter:
105379bdfe76SSatish Balay .  A - the matrix
105479bdfe76SSatish Balay 
105579bdfe76SSatish Balay    Notes:
105679bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
105779bdfe76SSatish Balay    (possibly both).
105879bdfe76SSatish Balay 
105979bdfe76SSatish Balay    Storage Information:
106079bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
106179bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
106279bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
106379bdfe76SSatish Balay    local matrix (a rectangular submatrix).
106479bdfe76SSatish Balay 
106579bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
106679bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
106779bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
106879bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
106979bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
107079bdfe76SSatish Balay 
107179bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
107279bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
107379bdfe76SSatish Balay 
107479bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
107579bdfe76SSatish Balay $         -------------------
107679bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
107779bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
107879bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
107979bdfe76SSatish Balay $         -------------------
108079bdfe76SSatish Balay $
108179bdfe76SSatish Balay 
108279bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
108379bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
108479bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
108557b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
108679bdfe76SSatish Balay 
108779bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
108879bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
108979bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
109092e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
109192e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
109292e8d321SLois Curfman McInnes    matrices and the file $(PETSC_DIR)/Performance.
109379bdfe76SSatish Balay 
109492e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
109579bdfe76SSatish Balay 
109679bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
109779bdfe76SSatish Balay @*/
109879bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
109979bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
110079bdfe76SSatish Balay {
110179bdfe76SSatish Balay   Mat          B;
110279bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
1103cee3aa6bSSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE;
110479bdfe76SSatish Balay 
110579bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
110679bdfe76SSatish Balay   *A = 0;
110779bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
110879bdfe76SSatish Balay   PLogObjectCreate(B);
110979bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
111079bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
111179bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
111279bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
111379bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
111457b952d6SSatish Balay 
111579bdfe76SSatish Balay   B->factor     = 0;
111679bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
111779bdfe76SSatish Balay 
111879bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
111979bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
112079bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
112179bdfe76SSatish Balay 
112279bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
112357b952d6SSatish Balay     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1124cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified");
1125cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified");
1126cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1127cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
112879bdfe76SSatish Balay 
112979bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
113079bdfe76SSatish Balay     work[0] = m; work[1] = n;
113179bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
113279bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
113379bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
113479bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
113579bdfe76SSatish Balay   }
113679bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
113779bdfe76SSatish Balay     Mbs = M/bs;
113879bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
113979bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
114079bdfe76SSatish Balay     m   = mbs*bs;
114179bdfe76SSatish Balay   }
114279bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
114379bdfe76SSatish Balay     Nbs = N/bs;
114479bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
114579bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
114679bdfe76SSatish Balay     n   = nbs*bs;
114779bdfe76SSatish Balay   }
1148cee3aa6bSSatish Balay   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
114979bdfe76SSatish Balay 
115079bdfe76SSatish Balay   b->m = m; B->m = m;
115179bdfe76SSatish Balay   b->n = n; B->n = n;
115279bdfe76SSatish Balay   b->N = N; B->N = N;
115379bdfe76SSatish Balay   b->M = M; B->M = M;
115479bdfe76SSatish Balay   b->bs  = bs;
115579bdfe76SSatish Balay   b->bs2 = bs*bs;
115679bdfe76SSatish Balay   b->mbs = mbs;
115779bdfe76SSatish Balay   b->nbs = nbs;
115879bdfe76SSatish Balay   b->Mbs = Mbs;
115979bdfe76SSatish Balay   b->Nbs = Nbs;
116079bdfe76SSatish Balay 
116179bdfe76SSatish Balay   /* build local table of row and column ownerships */
116279bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
116379bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
1164*0ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
116579bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
116679bdfe76SSatish Balay   b->rowners[0] = 0;
116779bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
116879bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
116979bdfe76SSatish Balay   }
117079bdfe76SSatish Balay   b->rstart = b->rowners[b->rank];
117179bdfe76SSatish Balay   b->rend   = b->rowners[b->rank+1];
117279bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
117379bdfe76SSatish Balay   b->cowners[0] = 0;
117479bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
117579bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
117679bdfe76SSatish Balay   }
117779bdfe76SSatish Balay   b->cstart = b->cowners[b->rank];
117879bdfe76SSatish Balay   b->cend   = b->cowners[b->rank+1];
117979bdfe76SSatish Balay 
118079bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
118179bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
118279bdfe76SSatish Balay   PLogObjectParent(B,b->A);
118379bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
118479bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
118579bdfe76SSatish Balay   PLogObjectParent(B,b->B);
118679bdfe76SSatish Balay 
118779bdfe76SSatish Balay   /* build cache for off array entries formed */
118879bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
118979bdfe76SSatish Balay   b->colmap      = 0;
119079bdfe76SSatish Balay   b->garray      = 0;
119179bdfe76SSatish Balay   b->roworiented = 1;
119279bdfe76SSatish Balay 
119379bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
119479bdfe76SSatish Balay   b->lvec      = 0;
119579bdfe76SSatish Balay   b->Mvctx     = 0;
119679bdfe76SSatish Balay 
119779bdfe76SSatish Balay   /* stuff for MatGetRow() */
119879bdfe76SSatish Balay   b->rowindices   = 0;
119979bdfe76SSatish Balay   b->rowvalues    = 0;
120079bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
120179bdfe76SSatish Balay 
120279bdfe76SSatish Balay   *A = B;
120379bdfe76SSatish Balay   return 0;
120479bdfe76SSatish Balay }
1205*0ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
1206*0ac07820SSatish Balay {
1207*0ac07820SSatish Balay   Mat        mat;
1208*0ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
1209*0ac07820SSatish Balay   int        ierr, len=0, flg;
1210*0ac07820SSatish Balay 
1211*0ac07820SSatish Balay   *newmat       = 0;
1212*0ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
1213*0ac07820SSatish Balay   PLogObjectCreate(mat);
1214*0ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
1215*0ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1216*0ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
1217*0ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
1218*0ac07820SSatish Balay   mat->factor     = matin->factor;
1219*0ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
1220*0ac07820SSatish Balay 
1221*0ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
1222*0ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
1223*0ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
1224*0ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
1225*0ac07820SSatish Balay 
1226*0ac07820SSatish Balay   a->bs  = oldmat->bs;
1227*0ac07820SSatish Balay   a->bs2 = oldmat->bs2;
1228*0ac07820SSatish Balay   a->mbs = oldmat->mbs;
1229*0ac07820SSatish Balay   a->nbs = oldmat->nbs;
1230*0ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
1231*0ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
1232*0ac07820SSatish Balay 
1233*0ac07820SSatish Balay   a->rstart       = oldmat->rstart;
1234*0ac07820SSatish Balay   a->rend         = oldmat->rend;
1235*0ac07820SSatish Balay   a->cstart       = oldmat->cstart;
1236*0ac07820SSatish Balay   a->cend         = oldmat->cend;
1237*0ac07820SSatish Balay   a->size         = oldmat->size;
1238*0ac07820SSatish Balay   a->rank         = oldmat->rank;
1239*0ac07820SSatish Balay   a->insertmode   = NOT_SET_VALUES;
1240*0ac07820SSatish Balay   a->rowvalues    = 0;
1241*0ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
1242*0ac07820SSatish Balay 
1243*0ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1244*0ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
1245*0ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
1246*0ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1247*0ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1248*0ac07820SSatish Balay   if (oldmat->colmap) {
1249*0ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
1250*0ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
1251*0ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
1252*0ac07820SSatish Balay   } else a->colmap = 0;
1253*0ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
1254*0ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1255*0ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
1256*0ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1257*0ac07820SSatish Balay   } else a->garray = 0;
1258*0ac07820SSatish Balay 
1259*0ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1260*0ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
1261*0ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1262*0ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
1263*0ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1264*0ac07820SSatish Balay   PLogObjectParent(mat,a->A);
1265*0ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1266*0ac07820SSatish Balay   PLogObjectParent(mat,a->B);
1267*0ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1268*0ac07820SSatish Balay   if (flg) {
1269*0ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1270*0ac07820SSatish Balay   }
1271*0ac07820SSatish Balay   *newmat = mat;
1272*0ac07820SSatish Balay   return 0;
1273*0ac07820SSatish Balay }
127457b952d6SSatish Balay 
127557b952d6SSatish Balay #include "sys.h"
127657b952d6SSatish Balay 
127757b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
127857b952d6SSatish Balay {
127957b952d6SSatish Balay   Mat          A;
128057b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
128157b952d6SSatish Balay   Scalar       *vals,*buf;
128257b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
128357b952d6SSatish Balay   MPI_Status   status;
1284cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
128557b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
128657b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
128757b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
128857b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
128957b952d6SSatish Balay 
129057b952d6SSatish Balay 
129157b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
129257b952d6SSatish Balay   bs2  = bs*bs;
129357b952d6SSatish Balay 
129457b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
129557b952d6SSatish Balay   if (!rank) {
129657b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
129757b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1298cee3aa6bSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object");
129957b952d6SSatish Balay   }
130057b952d6SSatish Balay 
130157b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
130257b952d6SSatish Balay   M = header[1]; N = header[2];
130357b952d6SSatish Balay 
130457b952d6SSatish Balay 
130557b952d6SSatish Balay   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
130657b952d6SSatish Balay 
130757b952d6SSatish Balay   /*
130857b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
130957b952d6SSatish Balay      divisible by the blocksize
131057b952d6SSatish Balay   */
131157b952d6SSatish Balay   Mbs        = M/bs;
131257b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
131357b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
131457b952d6SSatish Balay   else                  Mbs++;
131557b952d6SSatish Balay   if (extra_rows &&!rank) {
131657b952d6SSatish Balay     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
131757b952d6SSatish Balay   }
131857b952d6SSatish Balay   /* determine ownership of all rows */
131957b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
132057b952d6SSatish Balay   m   = mbs * bs;
1321cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1322cee3aa6bSSatish Balay   browners = rowners + size + 1;
132357b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
132457b952d6SSatish Balay   rowners[0] = 0;
1325cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1326cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
132757b952d6SSatish Balay   rstart = rowners[rank];
132857b952d6SSatish Balay   rend   = rowners[rank+1];
132957b952d6SSatish Balay 
133057b952d6SSatish Balay   /* distribute row lengths to all processors */
133157b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
133257b952d6SSatish Balay   if (!rank) {
133357b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
133457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
133557b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
133657b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1337cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1338cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
133957b952d6SSatish Balay     PetscFree(sndcounts);
134057b952d6SSatish Balay   }
134157b952d6SSatish Balay   else {
134257b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
134357b952d6SSatish Balay   }
134457b952d6SSatish Balay 
134557b952d6SSatish Balay   if (!rank) {
134657b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
134757b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
134857b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
134957b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
135057b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
135157b952d6SSatish Balay         procsnz[i] += rowlengths[j];
135257b952d6SSatish Balay       }
135357b952d6SSatish Balay     }
135457b952d6SSatish Balay     PetscFree(rowlengths);
135557b952d6SSatish Balay 
135657b952d6SSatish Balay     /* determine max buffer needed and allocate it */
135757b952d6SSatish Balay     maxnz = 0;
135857b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
135957b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
136057b952d6SSatish Balay     }
136157b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
136257b952d6SSatish Balay 
136357b952d6SSatish Balay     /* read in my part of the matrix column indices  */
136457b952d6SSatish Balay     nz = procsnz[0];
136557b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
136657b952d6SSatish Balay     mycols = ibuf;
1367cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
136857b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1369cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1370cee3aa6bSSatish Balay 
137157b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
137257b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
137357b952d6SSatish Balay       nz = procsnz[i];
137457b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
137557b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
137657b952d6SSatish Balay     }
137757b952d6SSatish Balay     /* read in the stuff for the last proc */
137857b952d6SSatish Balay     if ( size != 1 ) {
137957b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
138057b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
138157b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1382cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
138357b952d6SSatish Balay     }
138457b952d6SSatish Balay     PetscFree(cols);
138557b952d6SSatish Balay   }
138657b952d6SSatish Balay   else {
138757b952d6SSatish Balay     /* determine buffer space needed for message */
138857b952d6SSatish Balay     nz = 0;
138957b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
139057b952d6SSatish Balay       nz += locrowlens[i];
139157b952d6SSatish Balay     }
139257b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
139357b952d6SSatish Balay     mycols = ibuf;
139457b952d6SSatish Balay     /* receive message of column indices*/
139557b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
139657b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1397cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
139857b952d6SSatish Balay   }
139957b952d6SSatish Balay 
140057b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1401cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1402cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
140357b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1404cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
140557b952d6SSatish Balay   masked1 = mask    + Mbs;
140657b952d6SSatish Balay   masked2 = masked1 + Mbs;
140757b952d6SSatish Balay   rowcount = 0; nzcount = 0;
140857b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
140957b952d6SSatish Balay     dcount  = 0;
141057b952d6SSatish Balay     odcount = 0;
141157b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
141257b952d6SSatish Balay       kmax = locrowlens[rowcount];
141357b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
141457b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
141557b952d6SSatish Balay         if (!mask[tmp]) {
141657b952d6SSatish Balay           mask[tmp] = 1;
141757b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
141857b952d6SSatish Balay           else masked1[dcount++] = tmp;
141957b952d6SSatish Balay         }
142057b952d6SSatish Balay       }
142157b952d6SSatish Balay       rowcount++;
142257b952d6SSatish Balay     }
1423cee3aa6bSSatish Balay 
142457b952d6SSatish Balay     dlens[i]  = dcount;
142557b952d6SSatish Balay     odlens[i] = odcount;
1426cee3aa6bSSatish Balay 
142757b952d6SSatish Balay     /* zero out the mask elements we set */
142857b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
142957b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
143057b952d6SSatish Balay   }
1431cee3aa6bSSatish Balay 
143257b952d6SSatish Balay   /* create our matrix */
143357b952d6SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
143457b952d6SSatish Balay   A = *newmat;
14356d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
143657b952d6SSatish Balay 
143757b952d6SSatish Balay   if (!rank) {
143857b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
143957b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
144057b952d6SSatish Balay     nz = procsnz[0];
144157b952d6SSatish Balay     vals = buf;
1442cee3aa6bSSatish Balay     mycols = ibuf;
1443cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
144457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1445cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
144657b952d6SSatish Balay     /* insert into matrix */
144757b952d6SSatish Balay     jj      = rstart*bs;
144857b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
144957b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
145057b952d6SSatish Balay       mycols += locrowlens[i];
145157b952d6SSatish Balay       vals   += locrowlens[i];
145257b952d6SSatish Balay       jj++;
145357b952d6SSatish Balay     }
145457b952d6SSatish Balay     /* read in other processors( except the last one) and ship out */
145557b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
145657b952d6SSatish Balay       nz = procsnz[i];
145757b952d6SSatish Balay       vals = buf;
145857b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
145957b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
146057b952d6SSatish Balay     }
146157b952d6SSatish Balay     /* the last proc */
146257b952d6SSatish Balay     if ( size != 1 ){
146357b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1464cee3aa6bSSatish Balay       vals = buf;
146557b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
146657b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1467cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
146857b952d6SSatish Balay     }
146957b952d6SSatish Balay     PetscFree(procsnz);
147057b952d6SSatish Balay   }
147157b952d6SSatish Balay   else {
147257b952d6SSatish Balay     /* receive numeric values */
147357b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
147457b952d6SSatish Balay 
147557b952d6SSatish Balay     /* receive message of values*/
147657b952d6SSatish Balay     vals = buf;
1477cee3aa6bSSatish Balay     mycols = ibuf;
147857b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
147957b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1480cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
148157b952d6SSatish Balay 
148257b952d6SSatish Balay     /* insert into matrix */
148357b952d6SSatish Balay     jj      = rstart*bs;
1484cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
148557b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
148657b952d6SSatish Balay       mycols += locrowlens[i];
148757b952d6SSatish Balay       vals   += locrowlens[i];
148857b952d6SSatish Balay       jj++;
148957b952d6SSatish Balay     }
149057b952d6SSatish Balay   }
149157b952d6SSatish Balay   PetscFree(locrowlens);
149257b952d6SSatish Balay   PetscFree(buf);
149357b952d6SSatish Balay   PetscFree(ibuf);
149457b952d6SSatish Balay   PetscFree(rowners);
149557b952d6SSatish Balay   PetscFree(dlens);
1496cee3aa6bSSatish Balay   PetscFree(mask);
14976d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
14986d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
149957b952d6SSatish Balay   return 0;
150057b952d6SSatish Balay }
150157b952d6SSatish Balay 
150257b952d6SSatish Balay 
1503