xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 4e220ebcc634eb0c8ccd78bed049803c9194c4d4)
179bdfe76SSatish Balay #ifndef lint
2*4e220ebcSLois Curfman McInnes static char vcid[] = "$Id: mpibaij.c,v 1.22 1996/08/15 12:48:12 bsmith Exp curfman $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2357b952d6SSatish Balay 
24537820f0SBarry Smith /*
25537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2657b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2757b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2857b952d6SSatish Balay    length of colmap equals the global matrix length.
2957b952d6SSatish Balay */
3057b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3157b952d6SSatish Balay {
3257b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3357b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
3457b952d6SSatish Balay   int         nbs = B->nbs,i;
3557b952d6SSatish Balay 
3657b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
3757b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3857b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
39905e6a2fSBarry Smith   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i + 1;
4057b952d6SSatish Balay   return 0;
4157b952d6SSatish Balay }
4257b952d6SSatish Balay 
4357b952d6SSatish Balay 
44acdf5bf4SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm)
4557b952d6SSatish Balay {
4657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4757b952d6SSatish Balay   int         ierr;
4857b952d6SSatish Balay   if (baij->size == 1) {
4957b952d6SSatish Balay     ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr);
5057b952d6SSatish Balay   } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel");
5157b952d6SSatish Balay   return 0;
5257b952d6SSatish Balay }
5357b952d6SSatish Balay 
5457b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
5557b952d6SSatish Balay {
5657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
5757b952d6SSatish Balay   Scalar      value;
5857b952d6SSatish Balay   int         ierr,i,j, rstart = baij->rstart, rend = baij->rend;
5957b952d6SSatish Balay   int         cstart = baij->cstart, cend = baij->cend,row,col;
6057b952d6SSatish Balay   int         roworiented = baij->roworiented,rstart_orig,rend_orig;
6157b952d6SSatish Balay   int         cstart_orig,cend_orig,bs=baij->bs;
6257b952d6SSatish Balay 
6357b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
6457b952d6SSatish Balay     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
6557b952d6SSatish Balay   }
6657b952d6SSatish Balay   baij->insertmode = addv;
6757b952d6SSatish Balay   rstart_orig = rstart*bs;
6857b952d6SSatish Balay   rend_orig   = rend*bs;
6957b952d6SSatish Balay   cstart_orig = cstart*bs;
7057b952d6SSatish Balay   cend_orig   = cend*bs;
7157b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
7257b952d6SSatish Balay     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
7357b952d6SSatish Balay     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
7457b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
7557b952d6SSatish Balay       row = im[i] - rstart_orig;
7657b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
7757b952d6SSatish Balay         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");
7857b952d6SSatish Balay         if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");
7957b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
8057b952d6SSatish Balay           col = in[j] - cstart_orig;
8157b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
8257b952d6SSatish Balay           ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
8357b952d6SSatish Balay         }
8457b952d6SSatish Balay         else {
8557b952d6SSatish Balay           if (mat->was_assembled) {
86905e6a2fSBarry Smith             if (!baij->colmap) {
87905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
88905e6a2fSBarry Smith             }
89905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
9057b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
9157b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
9257b952d6SSatish Balay               col =  in[j];
9357b952d6SSatish Balay             }
9457b952d6SSatish Balay           }
9557b952d6SSatish Balay           else col = in[j];
9657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
9757b952d6SSatish Balay           ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
9857b952d6SSatish Balay         }
9957b952d6SSatish Balay       }
10057b952d6SSatish Balay     }
10157b952d6SSatish Balay     else {
10257b952d6SSatish Balay       if (roworiented) {
10357b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
10457b952d6SSatish Balay       }
10557b952d6SSatish Balay       else {
10657b952d6SSatish Balay         row = im[i];
10757b952d6SSatish Balay         for ( j=0; j<n; j++ ) {
10857b952d6SSatish Balay           ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
10957b952d6SSatish Balay         }
11057b952d6SSatish Balay       }
11157b952d6SSatish Balay     }
11257b952d6SSatish Balay   }
11357b952d6SSatish Balay   return 0;
11457b952d6SSatish Balay }
11557b952d6SSatish Balay 
116d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
117d6de1c52SSatish Balay {
118d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
119d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
120d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
121d6de1c52SSatish Balay 
122d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
123d6de1c52SSatish Balay     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row");
124d6de1c52SSatish Balay     if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large");
125d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
126d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
127d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
128d6de1c52SSatish Balay         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column");
129d6de1c52SSatish Balay         if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large");
130d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
131d6de1c52SSatish Balay           col = idxn[j] - bscstart;
132d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
133d6de1c52SSatish Balay         }
134d6de1c52SSatish Balay         else {
135905e6a2fSBarry Smith           if (!baij->colmap) {
136905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
137905e6a2fSBarry Smith           }
138e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
139e60e1c95SSatish Balay              (baij->garray[baij->colmap[idxn[j]/bs]-1] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
140d9d09a02SSatish Balay           else {
141905e6a2fSBarry Smith             col  = (baij->colmap[idxn[j]/bs]-1)*bs + idxn[j]%bs;
142d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
143d6de1c52SSatish Balay           }
144d6de1c52SSatish Balay         }
145d6de1c52SSatish Balay       }
146d9d09a02SSatish Balay     }
147d6de1c52SSatish Balay     else {
148d6de1c52SSatish Balay       SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported");
149d6de1c52SSatish Balay     }
150d6de1c52SSatish Balay   }
151d6de1c52SSatish Balay   return 0;
152d6de1c52SSatish Balay }
153d6de1c52SSatish Balay 
154d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
155d6de1c52SSatish Balay {
156d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
157d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
158acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
159d6de1c52SSatish Balay   double     sum = 0.0;
160d6de1c52SSatish Balay   Scalar     *v;
161d6de1c52SSatish Balay 
162d6de1c52SSatish Balay   if (baij->size == 1) {
163d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
164d6de1c52SSatish Balay   } else {
165d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
166d6de1c52SSatish Balay       v = amat->a;
167d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
168d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
169d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
170d6de1c52SSatish Balay #else
171d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
172d6de1c52SSatish Balay #endif
173d6de1c52SSatish Balay       }
174d6de1c52SSatish Balay       v = bmat->a;
175d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
176d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
177d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
178d6de1c52SSatish Balay #else
179d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
180d6de1c52SSatish Balay #endif
181d6de1c52SSatish Balay       }
182d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
183d6de1c52SSatish Balay       *norm = sqrt(*norm);
184d6de1c52SSatish Balay     }
185acdf5bf4SSatish Balay     else
186acdf5bf4SSatish Balay       SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
187d6de1c52SSatish Balay   }
188d6de1c52SSatish Balay   return 0;
189d6de1c52SSatish Balay }
19057b952d6SSatish Balay 
19157b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
19257b952d6SSatish Balay {
19357b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
19457b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
19557b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
19657b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
19757b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
19857b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
19957b952d6SSatish Balay   InsertMode  addv;
20057b952d6SSatish Balay   Scalar      *rvalues,*svalues;
20157b952d6SSatish Balay 
20257b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
20357b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
20457b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
20557b952d6SSatish Balay     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
20657b952d6SSatish Balay   }
20757b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
20857b952d6SSatish Balay 
20957b952d6SSatish Balay   /*  first count number of contributors to each processor */
21057b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
21157b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
21257b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
21357b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
21457b952d6SSatish Balay     idx = baij->stash.idx[i];
21557b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
21657b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
21757b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
21857b952d6SSatish Balay       }
21957b952d6SSatish Balay     }
22057b952d6SSatish Balay   }
22157b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
22257b952d6SSatish Balay 
22357b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
22457b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
22557b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
22657b952d6SSatish Balay   nreceives = work[rank];
22757b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
22857b952d6SSatish Balay   nmax = work[rank];
22957b952d6SSatish Balay   PetscFree(work);
23057b952d6SSatish Balay 
23157b952d6SSatish Balay   /* post receives:
23257b952d6SSatish Balay        1) each message will consist of ordered pairs
23357b952d6SSatish Balay      (global index,value) we store the global index as a double
23457b952d6SSatish Balay      to simplify the message passing.
23557b952d6SSatish Balay        2) since we don't know how long each individual message is we
23657b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
23757b952d6SSatish Balay      this is a lot of wasted space.
23857b952d6SSatish Balay 
23957b952d6SSatish Balay 
24057b952d6SSatish Balay        This could be done better.
24157b952d6SSatish Balay   */
24257b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
24357b952d6SSatish Balay   CHKPTRQ(rvalues);
24457b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
24557b952d6SSatish Balay   CHKPTRQ(recv_waits);
24657b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
24757b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
24857b952d6SSatish Balay               comm,recv_waits+i);
24957b952d6SSatish Balay   }
25057b952d6SSatish Balay 
25157b952d6SSatish Balay   /* do sends:
25257b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
25357b952d6SSatish Balay          the ith processor
25457b952d6SSatish Balay   */
25557b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
25657b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
25757b952d6SSatish Balay   CHKPTRQ(send_waits);
25857b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
25957b952d6SSatish Balay   starts[0] = 0;
26057b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
26157b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
26257b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
26357b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
26457b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
26557b952d6SSatish Balay   }
26657b952d6SSatish Balay   PetscFree(owner);
26757b952d6SSatish Balay   starts[0] = 0;
26857b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
26957b952d6SSatish Balay   count = 0;
27057b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
27157b952d6SSatish Balay     if (procs[i]) {
27257b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
27357b952d6SSatish Balay                 comm,send_waits+count++);
27457b952d6SSatish Balay     }
27557b952d6SSatish Balay   }
27657b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
27757b952d6SSatish Balay 
27857b952d6SSatish Balay   /* Free cache space */
27957b952d6SSatish Balay   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n);
28057b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
28157b952d6SSatish Balay 
28257b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
28357b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
28457b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
28557b952d6SSatish Balay   baij->rmax       = nmax;
28657b952d6SSatish Balay 
28757b952d6SSatish Balay   return 0;
28857b952d6SSatish Balay }
28957b952d6SSatish Balay 
29057b952d6SSatish Balay 
29157b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
29257b952d6SSatish Balay {
29357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
29457b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
29557b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
29657b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
29757b952d6SSatish Balay   Scalar      *values,val;
29857b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
29957b952d6SSatish Balay 
30057b952d6SSatish Balay   /*  wait on receives */
30157b952d6SSatish Balay   while (count) {
30257b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
30357b952d6SSatish Balay     /* unpack receives into our local space */
30457b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
30557b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
30657b952d6SSatish Balay     n = n/3;
30757b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
30857b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
30957b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
31057b952d6SSatish Balay       val = values[3*i+2];
31157b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
31257b952d6SSatish Balay         col -= baij->cstart*bs;
31357b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
31457b952d6SSatish Balay       }
31557b952d6SSatish Balay       else {
31657b952d6SSatish Balay         if (mat->was_assembled) {
317905e6a2fSBarry Smith           if (!baij->colmap) {
318905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
319905e6a2fSBarry Smith           }
320905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
32157b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
32257b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
32357b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
32457b952d6SSatish Balay           }
32557b952d6SSatish Balay         }
32657b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
32757b952d6SSatish Balay       }
32857b952d6SSatish Balay     }
32957b952d6SSatish Balay     count--;
33057b952d6SSatish Balay   }
33157b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
33257b952d6SSatish Balay 
33357b952d6SSatish Balay   /* wait on sends */
33457b952d6SSatish Balay   if (baij->nsends) {
33557b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
33657b952d6SSatish Balay     CHKPTRQ(send_status);
33757b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
33857b952d6SSatish Balay     PetscFree(send_status);
33957b952d6SSatish Balay   }
34057b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
34157b952d6SSatish Balay 
34257b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
34357b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
34457b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
34557b952d6SSatish Balay 
34657b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
34757b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
34857b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
34957b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
35057b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
35157b952d6SSatish Balay   }
35257b952d6SSatish Balay 
3536d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
35457b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
35557b952d6SSatish Balay   }
35657b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
35757b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
35857b952d6SSatish Balay 
35957b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
36057b952d6SSatish Balay   return 0;
36157b952d6SSatish Balay }
36257b952d6SSatish Balay 
36357b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
36457b952d6SSatish Balay {
36557b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
36657b952d6SSatish Balay   int          ierr;
36757b952d6SSatish Balay 
36857b952d6SSatish Balay   if (baij->size == 1) {
36957b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
37057b952d6SSatish Balay   }
37157b952d6SSatish Balay   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
37257b952d6SSatish Balay   return 0;
37357b952d6SSatish Balay }
37457b952d6SSatish Balay 
37557b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
37657b952d6SSatish Balay {
37757b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
378cee3aa6bSSatish Balay   int          ierr, format,rank,bs=baij->bs;
37957b952d6SSatish Balay   FILE         *fd;
38057b952d6SSatish Balay   ViewerType   vtype;
38157b952d6SSatish Balay 
38257b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
38357b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
38457b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
38557b952d6SSatish Balay     if (format == ASCII_FORMAT_INFO_DETAILED) {
386*4e220ebcSLois Curfman McInnes       MatInfo info;
38757b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
38857b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
389*4e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
39057b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
39157b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
392*4e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
393*4e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
394*4e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
395*4e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
396*4e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
397*4e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
39857b952d6SSatish Balay       fflush(fd);
39957b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
40057b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
40157b952d6SSatish Balay       return 0;
40257b952d6SSatish Balay     }
40357b952d6SSatish Balay     else if (format == ASCII_FORMAT_INFO) {
40457b952d6SSatish Balay       return 0;
40557b952d6SSatish Balay     }
40657b952d6SSatish Balay   }
40757b952d6SSatish Balay 
40857b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
40957b952d6SSatish Balay     Draw       draw;
41057b952d6SSatish Balay     PetscTruth isnull;
41157b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
41257b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
41357b952d6SSatish Balay   }
41457b952d6SSatish Balay 
41557b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
41657b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
41757b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
41857b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
41957b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
42057b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
42157b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
42257b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
42357b952d6SSatish Balay     fflush(fd);
42457b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
42557b952d6SSatish Balay   }
42657b952d6SSatish Balay   else {
42757b952d6SSatish Balay     int size = baij->size;
42857b952d6SSatish Balay     rank = baij->rank;
42957b952d6SSatish Balay     if (size == 1) {
43057b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
43157b952d6SSatish Balay     }
43257b952d6SSatish Balay     else {
43357b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
43457b952d6SSatish Balay       Mat         A;
43557b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
43657b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
43757b952d6SSatish Balay       int         mbs=baij->mbs;
43857b952d6SSatish Balay       Scalar      *a;
43957b952d6SSatish Balay 
44057b952d6SSatish Balay       if (!rank) {
441cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
44257b952d6SSatish Balay         CHKERRQ(ierr);
44357b952d6SSatish Balay       }
44457b952d6SSatish Balay       else {
445cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
44657b952d6SSatish Balay         CHKERRQ(ierr);
44757b952d6SSatish Balay       }
44857b952d6SSatish Balay       PLogObjectParent(mat,A);
44957b952d6SSatish Balay 
45057b952d6SSatish Balay       /* copy over the A part */
45157b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
45257b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
45357b952d6SSatish Balay       row = baij->rstart;
45457b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
45557b952d6SSatish Balay 
45657b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
45757b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
45857b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
45957b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
46057b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
46157b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
462cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
463cee3aa6bSSatish Balay             col++; a += bs;
46457b952d6SSatish Balay           }
46557b952d6SSatish Balay         }
46657b952d6SSatish Balay       }
46757b952d6SSatish Balay       /* copy over the B part */
46857b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
46957b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
47057b952d6SSatish Balay       row = baij->rstart*bs;
47157b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
47257b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
47357b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
47457b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
47557b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
47657b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
477cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
478cee3aa6bSSatish Balay             col++; a += bs;
47957b952d6SSatish Balay           }
48057b952d6SSatish Balay         }
48157b952d6SSatish Balay       }
48257b952d6SSatish Balay       PetscFree(rvals);
4836d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
4846d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
48557b952d6SSatish Balay       if (!rank) {
48657b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
48757b952d6SSatish Balay       }
48857b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
48957b952d6SSatish Balay     }
49057b952d6SSatish Balay   }
49157b952d6SSatish Balay   return 0;
49257b952d6SSatish Balay }
49357b952d6SSatish Balay 
49457b952d6SSatish Balay 
49557b952d6SSatish Balay 
49657b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
49757b952d6SSatish Balay {
49857b952d6SSatish Balay   Mat         mat = (Mat) obj;
49957b952d6SSatish Balay   int         ierr;
50057b952d6SSatish Balay   ViewerType  vtype;
50157b952d6SSatish Balay 
50257b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
50357b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
50457b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
50557b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
50657b952d6SSatish Balay   }
50757b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
50857b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
50957b952d6SSatish Balay   }
51057b952d6SSatish Balay   return 0;
51157b952d6SSatish Balay }
51257b952d6SSatish Balay 
51379bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
51479bdfe76SSatish Balay {
51579bdfe76SSatish Balay   Mat         mat = (Mat) obj;
51679bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
51779bdfe76SSatish Balay   int         ierr;
51879bdfe76SSatish Balay 
51979bdfe76SSatish Balay #if defined(PETSC_LOG)
52079bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
52179bdfe76SSatish Balay #endif
52279bdfe76SSatish Balay 
52379bdfe76SSatish Balay   PetscFree(baij->rowners);
52479bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
52579bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
52679bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
52779bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
52879bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
52979bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
53079bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
53179bdfe76SSatish Balay   PetscFree(baij);
53279bdfe76SSatish Balay   PLogObjectDestroy(mat);
53379bdfe76SSatish Balay   PetscHeaderDestroy(mat);
53479bdfe76SSatish Balay   return 0;
53579bdfe76SSatish Balay }
53679bdfe76SSatish Balay 
537cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
538cee3aa6bSSatish Balay {
539cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
54047b4a8eaSLois Curfman McInnes   int         ierr, nt;
541cee3aa6bSSatish Balay 
542c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
54347b4a8eaSLois Curfman McInnes   if (nt != a->n) {
5440ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and xx");
54547b4a8eaSLois Curfman McInnes   }
546c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
54747b4a8eaSLois Curfman McInnes   if (nt != a->m) {
5480ac07820SSatish Balay     SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and yy");
54947b4a8eaSLois Curfman McInnes   }
550cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
551cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
552cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
553cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
554cee3aa6bSSatish Balay   return 0;
555cee3aa6bSSatish Balay }
556cee3aa6bSSatish Balay 
557cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
558cee3aa6bSSatish Balay {
559cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
560cee3aa6bSSatish Balay   int        ierr;
561cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
562cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
563cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
564cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
565cee3aa6bSSatish Balay   return 0;
566cee3aa6bSSatish Balay }
567cee3aa6bSSatish Balay 
568cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
569cee3aa6bSSatish Balay {
570cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
571cee3aa6bSSatish Balay   int        ierr;
572cee3aa6bSSatish Balay 
573cee3aa6bSSatish Balay   /* do nondiagonal part */
574cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
575cee3aa6bSSatish Balay   /* send it on its way */
576537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
577cee3aa6bSSatish Balay   /* do local part */
578cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
579cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
580cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
581cee3aa6bSSatish Balay   /* but is not perhaps always true. */
582cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
583cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx);CHKERRQ(ierr);
584cee3aa6bSSatish Balay   return 0;
585cee3aa6bSSatish Balay }
586cee3aa6bSSatish Balay 
587cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
588cee3aa6bSSatish Balay {
589cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
590cee3aa6bSSatish Balay   int        ierr;
591cee3aa6bSSatish Balay 
592cee3aa6bSSatish Balay   /* do nondiagonal part */
593cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
594cee3aa6bSSatish Balay   /* send it on its way */
595537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
596cee3aa6bSSatish Balay   /* do local part */
597cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
598cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
599cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
600cee3aa6bSSatish Balay   /* but is not perhaps always true. */
601537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
602cee3aa6bSSatish Balay   return 0;
603cee3aa6bSSatish Balay }
604cee3aa6bSSatish Balay 
605cee3aa6bSSatish Balay /*
606cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
607cee3aa6bSSatish Balay    diagonal block
608cee3aa6bSSatish Balay */
609cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
610cee3aa6bSSatish Balay {
611cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
612cee3aa6bSSatish Balay   if (a->M != a->N)
613cee3aa6bSSatish Balay     SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block");
614cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
615cee3aa6bSSatish Balay }
616cee3aa6bSSatish Balay 
617cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
618cee3aa6bSSatish Balay {
619cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
620cee3aa6bSSatish Balay   int        ierr;
621cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
622cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
623cee3aa6bSSatish Balay   return 0;
624cee3aa6bSSatish Balay }
62557b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
62657b952d6SSatish Balay {
62757b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
62857b952d6SSatish Balay   *m = mat->M; *n = mat->N;
62957b952d6SSatish Balay   return 0;
63057b952d6SSatish Balay }
63157b952d6SSatish Balay 
63257b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
63357b952d6SSatish Balay {
63457b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
63557b952d6SSatish Balay   *m = mat->m; *n = mat->N;
63657b952d6SSatish Balay   return 0;
63757b952d6SSatish Balay }
63857b952d6SSatish Balay 
63957b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
64057b952d6SSatish Balay {
64157b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
64257b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
64357b952d6SSatish Balay   return 0;
64457b952d6SSatish Balay }
64557b952d6SSatish Balay 
646acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
647acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
648acdf5bf4SSatish Balay 
649acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
650acdf5bf4SSatish Balay {
651acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
652acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
653acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
654d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
655d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
656acdf5bf4SSatish Balay 
657acdf5bf4SSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active");
658acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
659acdf5bf4SSatish Balay 
660acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
661acdf5bf4SSatish Balay     /*
662acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
663acdf5bf4SSatish Balay     */
664acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
665bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
666bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
667acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
668acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
669acdf5bf4SSatish Balay     }
670acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
671acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
672acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
673acdf5bf4SSatish Balay   }
674acdf5bf4SSatish Balay 
675acdf5bf4SSatish Balay 
676d9d09a02SSatish Balay   if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows")
677d9d09a02SSatish Balay   lrow = row - brstart;
678acdf5bf4SSatish Balay 
679acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
680acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
681acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
682acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
683acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
684acdf5bf4SSatish Balay   nztot = nzA + nzB;
685acdf5bf4SSatish Balay 
686acdf5bf4SSatish Balay   cmap  = mat->garray;
687acdf5bf4SSatish Balay   if (v  || idx) {
688acdf5bf4SSatish Balay     if (nztot) {
689acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
690acdf5bf4SSatish Balay       int imark = -1;
691acdf5bf4SSatish Balay       if (v) {
692acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
693acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
694d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
695acdf5bf4SSatish Balay           else break;
696acdf5bf4SSatish Balay         }
697acdf5bf4SSatish Balay         imark = i;
698acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
699acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
700acdf5bf4SSatish Balay       }
701acdf5bf4SSatish Balay       if (idx) {
702acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
703acdf5bf4SSatish Balay         if (imark > -1) {
704acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
705bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
706acdf5bf4SSatish Balay           }
707acdf5bf4SSatish Balay         } else {
708acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
709d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
710d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
711acdf5bf4SSatish Balay             else break;
712acdf5bf4SSatish Balay           }
713acdf5bf4SSatish Balay           imark = i;
714acdf5bf4SSatish Balay         }
715d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
716d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
717acdf5bf4SSatish Balay       }
718acdf5bf4SSatish Balay     }
719d212a18eSSatish Balay     else {
720d212a18eSSatish Balay       if (idx) *idx = 0;
721d212a18eSSatish Balay       if (v)   *v   = 0;
722d212a18eSSatish Balay     }
723acdf5bf4SSatish Balay   }
724acdf5bf4SSatish Balay   *nz = nztot;
725acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
726acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
727acdf5bf4SSatish Balay   return 0;
728acdf5bf4SSatish Balay }
729acdf5bf4SSatish Balay 
730acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
731acdf5bf4SSatish Balay {
732acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
733acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
734acdf5bf4SSatish Balay     SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called");
735acdf5bf4SSatish Balay   }
736acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
737acdf5bf4SSatish Balay   return 0;
738acdf5bf4SSatish Balay }
739acdf5bf4SSatish Balay 
7405a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
7415a838052SSatish Balay {
7425a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
7435a838052SSatish Balay   *bs = baij->bs;
7445a838052SSatish Balay   return 0;
7455a838052SSatish Balay }
7465a838052SSatish Balay 
74758667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A)
74858667388SSatish Balay {
74958667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
75058667388SSatish Balay   int         ierr;
75158667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
75258667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
75358667388SSatish Balay   return 0;
75458667388SSatish Balay }
7550ac07820SSatish Balay 
756*4e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
7570ac07820SSatish Balay {
758*4e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
759*4e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
760*4e220ebcSLois Curfman McInnes   int         ierr, isend[3], irecv[3];
7610ac07820SSatish Balay 
762*4e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
763*4e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
764*4e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
765*4e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
766*4e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
767*4e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
768*4e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
769*4e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
770*4e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
7710ac07820SSatish Balay   if (flag == MAT_LOCAL) {
772*4e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
773*4e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
774*4e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
775*4e220ebcSLois Curfman McInnes     info->memory       = isend[3];
776*4e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
7770ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
7780ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm);
779*4e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
780*4e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
781*4e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
782*4e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
783*4e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7840ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
7850ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm);
786*4e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
787*4e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
788*4e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
789*4e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
790*4e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7910ac07820SSatish Balay   }
792*4e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
793*4e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
794*4e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
7950ac07820SSatish Balay   return 0;
7960ac07820SSatish Balay }
7970ac07820SSatish Balay 
79858667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op)
79958667388SSatish Balay {
80058667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
80158667388SSatish Balay 
80258667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
80358667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
80458667388SSatish Balay       op == MAT_COLUMNS_SORTED ||
80558667388SSatish Balay       op == MAT_ROW_ORIENTED) {
80658667388SSatish Balay         MatSetOption(a->A,op);
80758667388SSatish Balay         MatSetOption(a->B,op);
80858667388SSatish Balay   }
80958667388SSatish Balay   else if (op == MAT_ROWS_SORTED ||
81058667388SSatish Balay            op == MAT_SYMMETRIC ||
81158667388SSatish Balay            op == MAT_STRUCTURALLY_SYMMETRIC ||
81258667388SSatish Balay            op == MAT_YES_NEW_DIAGONALS)
81358667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
81458667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
81558667388SSatish Balay     a->roworiented = 0;
81658667388SSatish Balay     MatSetOption(a->A,op);
81758667388SSatish Balay     MatSetOption(a->B,op);
81858667388SSatish Balay   }
81958667388SSatish Balay   else if (op == MAT_NO_NEW_DIAGONALS)
8200ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:MAT_NO_NEW_DIAGONALS");}
82158667388SSatish Balay   else
8220ac07820SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:unknown option");}
82358667388SSatish Balay   return 0;
82458667388SSatish Balay }
82558667388SSatish Balay 
8260ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
8270ac07820SSatish Balay {
8280ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
8290ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
8300ac07820SSatish Balay   Mat        B;
8310ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
8320ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
8330ac07820SSatish Balay   Scalar     *a;
8340ac07820SSatish Balay 
8350ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
8360ac07820SSatish Balay     SETERRQ(1,"MatTranspose_MPIBAIJ:Square matrix only for in-place");
8370ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
8380ac07820SSatish Balay   CHKERRQ(ierr);
8390ac07820SSatish Balay 
8400ac07820SSatish Balay   /* copy over the A part */
8410ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
8420ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
8430ac07820SSatish Balay   row = baij->rstart;
8440ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
8450ac07820SSatish Balay 
8460ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
8470ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
8480ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
8490ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
8500ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
8510ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
8520ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
8530ac07820SSatish Balay         col++; a += bs;
8540ac07820SSatish Balay       }
8550ac07820SSatish Balay     }
8560ac07820SSatish Balay   }
8570ac07820SSatish Balay   /* copy over the B part */
8580ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
8590ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
8600ac07820SSatish Balay   row = baij->rstart*bs;
8610ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
8620ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
8630ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
8640ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
8650ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
8660ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
8670ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
8680ac07820SSatish Balay         col++; a += bs;
8690ac07820SSatish Balay       }
8700ac07820SSatish Balay     }
8710ac07820SSatish Balay   }
8720ac07820SSatish Balay   PetscFree(rvals);
8730ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8740ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8750ac07820SSatish Balay 
8760ac07820SSatish Balay   if (matout != PETSC_NULL) {
8770ac07820SSatish Balay     *matout = B;
8780ac07820SSatish Balay   } else {
8790ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
8800ac07820SSatish Balay     PetscFree(baij->rowners);
8810ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
8820ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
8830ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
8840ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
8850ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
8860ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
8870ac07820SSatish Balay     PetscFree(baij);
8880ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
8890ac07820SSatish Balay     PetscHeaderDestroy(B);
8900ac07820SSatish Balay   }
8910ac07820SSatish Balay   return 0;
8920ac07820SSatish Balay }
8930ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
8940ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
8950ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
8960ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
8970ac07820SSatish Balay    routine.
8980ac07820SSatish Balay */
8990ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
9000ac07820SSatish Balay {
9010ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
9020ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
9030ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
9040ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
9050ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
9060ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
9070ac07820SSatish Balay   MPI_Comm       comm = A->comm;
9080ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
9090ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
9100ac07820SSatish Balay   IS             istmp;
9110ac07820SSatish Balay 
9120ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
9130ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
9140ac07820SSatish Balay 
9150ac07820SSatish Balay   /*  first count number of contributors to each processor */
9160ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
9170ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
9180ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
9190ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
9200ac07820SSatish Balay     idx = rows[i];
9210ac07820SSatish Balay     found = 0;
9220ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
9230ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
9240ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
9250ac07820SSatish Balay       }
9260ac07820SSatish Balay     }
9270ac07820SSatish Balay     if (!found) SETERRQ(1,"MatZeroRows_MPIBAIJ:Index out of range");
9280ac07820SSatish Balay   }
9290ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
9300ac07820SSatish Balay 
9310ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
9320ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
9330ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
9340ac07820SSatish Balay   nrecvs = work[rank];
9350ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
9360ac07820SSatish Balay   nmax = work[rank];
9370ac07820SSatish Balay   PetscFree(work);
9380ac07820SSatish Balay 
9390ac07820SSatish Balay   /* post receives:   */
9400ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
9410ac07820SSatish Balay   CHKPTRQ(rvalues);
9420ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
9430ac07820SSatish Balay   CHKPTRQ(recv_waits);
9440ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
9450ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
9460ac07820SSatish Balay   }
9470ac07820SSatish Balay 
9480ac07820SSatish Balay   /* do sends:
9490ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
9500ac07820SSatish Balay          the ith processor
9510ac07820SSatish Balay   */
9520ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
9530ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
9540ac07820SSatish Balay   CHKPTRQ(send_waits);
9550ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
9560ac07820SSatish Balay   starts[0] = 0;
9570ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
9580ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
9590ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
9600ac07820SSatish Balay   }
9610ac07820SSatish Balay   ISRestoreIndices(is,&rows);
9620ac07820SSatish Balay 
9630ac07820SSatish Balay   starts[0] = 0;
9640ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
9650ac07820SSatish Balay   count = 0;
9660ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
9670ac07820SSatish Balay     if (procs[i]) {
9680ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
9690ac07820SSatish Balay     }
9700ac07820SSatish Balay   }
9710ac07820SSatish Balay   PetscFree(starts);
9720ac07820SSatish Balay 
9730ac07820SSatish Balay   base = owners[rank]*bs;
9740ac07820SSatish Balay 
9750ac07820SSatish Balay   /*  wait on receives */
9760ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
9770ac07820SSatish Balay   source = lens + nrecvs;
9780ac07820SSatish Balay   count  = nrecvs; slen = 0;
9790ac07820SSatish Balay   while (count) {
9800ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
9810ac07820SSatish Balay     /* unpack receives into our local space */
9820ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
9830ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
9840ac07820SSatish Balay     lens[imdex]  = n;
9850ac07820SSatish Balay     slen += n;
9860ac07820SSatish Balay     count--;
9870ac07820SSatish Balay   }
9880ac07820SSatish Balay   PetscFree(recv_waits);
9890ac07820SSatish Balay 
9900ac07820SSatish Balay   /* move the data into the send scatter */
9910ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
9920ac07820SSatish Balay   count = 0;
9930ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
9940ac07820SSatish Balay     values = rvalues + i*nmax;
9950ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
9960ac07820SSatish Balay       lrows[count++] = values[j] - base;
9970ac07820SSatish Balay     }
9980ac07820SSatish Balay   }
9990ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
10000ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
10010ac07820SSatish Balay 
10020ac07820SSatish Balay   /* actually zap the local rows */
1003537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
10040ac07820SSatish Balay   PLogObjectParent(A,istmp);
10050ac07820SSatish Balay   PetscFree(lrows);
10060ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
10070ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
10080ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
10090ac07820SSatish Balay 
10100ac07820SSatish Balay   /* wait on sends */
10110ac07820SSatish Balay   if (nsends) {
10120ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
10130ac07820SSatish Balay     CHKPTRQ(send_status);
10140ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
10150ac07820SSatish Balay     PetscFree(send_status);
10160ac07820SSatish Balay   }
10170ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
10180ac07820SSatish Balay 
10190ac07820SSatish Balay   return 0;
10200ac07820SSatish Balay }
1021ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
1022ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A)
1023ba4ca20aSSatish Balay {
1024ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1025ba4ca20aSSatish Balay 
1026ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1027ba4ca20aSSatish Balay   else return 0;
1028ba4ca20aSSatish Balay }
10290ac07820SSatish Balay 
10300ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
10310ac07820SSatish Balay 
103279bdfe76SSatish Balay /* -------------------------------------------------------------------*/
103379bdfe76SSatish Balay static struct _MatOps MatOps = {
1034bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
103593bc47c4SSatish Balay   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,MatSolve_MPIBAIJ,
103693bc47c4SSatish Balay   MatSolveAdd_MPIBAIJ,MatSolveTrans_MPIBAIJ,MatSolveTransAdd_MPIBAIJ,MatLUFactor_MPIBAIJ,
10370ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
1038acdf5bf4SSatish Balay   0,MatGetDiagonal_MPIBAIJ,0,MatNorm_MPIBAIJ,
103958667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
10400ac07820SSatish Balay   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,MatGetReordering_MPIBAIJ,MatLUFactorSymbolic_MPIBAIJ,
104193bc47c4SSatish Balay   MatLUFactorNumeric_MPIBAIJ,0,0,MatGetSize_MPIBAIJ,
104293bc47c4SSatish Balay   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,MatILUFactorSymbolic_MPIBAIJ,0,
1043905e6a2fSBarry Smith   0,0,0,MatConvertSameType_MPIBAIJ,0,0,
1044d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1045ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
10465a838052SSatish Balay   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ};
104779bdfe76SSatish Balay 
104879bdfe76SSatish Balay 
104979bdfe76SSatish Balay /*@C
105079bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
105179bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
105279bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
105379bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
105479bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
105579bdfe76SSatish Balay 
105679bdfe76SSatish Balay    Input Parameters:
105779bdfe76SSatish Balay .  comm - MPI communicator
105879bdfe76SSatish Balay .  bs   - size of blockk
105979bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
106092e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
106192e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
106292e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
106392e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
106492e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
106579bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
106692e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
106779bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
106879bdfe76SSatish Balay            submatrix  (same for all local rows)
106992e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
107092e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
107192e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
107292e8d321SLois Curfman McInnes            it is zero.
107392e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
107479bdfe76SSatish Balay            submatrix (same for all local rows).
107592e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
107692e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
107792e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
107879bdfe76SSatish Balay 
107979bdfe76SSatish Balay    Output Parameter:
108079bdfe76SSatish Balay .  A - the matrix
108179bdfe76SSatish Balay 
108279bdfe76SSatish Balay    Notes:
108379bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
108479bdfe76SSatish Balay    (possibly both).
108579bdfe76SSatish Balay 
108679bdfe76SSatish Balay    Storage Information:
108779bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
108879bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
108979bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
109079bdfe76SSatish Balay    local matrix (a rectangular submatrix).
109179bdfe76SSatish Balay 
109279bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
109379bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
109479bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
109579bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
109679bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
109779bdfe76SSatish Balay 
109879bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
109979bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
110079bdfe76SSatish Balay 
110179bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
110279bdfe76SSatish Balay $         -------------------
110379bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
110479bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
110579bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
110679bdfe76SSatish Balay $         -------------------
110779bdfe76SSatish Balay $
110879bdfe76SSatish Balay 
110979bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
111079bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
111179bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
111257b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
111379bdfe76SSatish Balay 
111479bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
111579bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
111679bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
111792e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
111892e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
111992e8d321SLois Curfman McInnes    matrices and the file $(PETSC_DIR)/Performance.
112079bdfe76SSatish Balay 
112192e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
112279bdfe76SSatish Balay 
112379bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
112479bdfe76SSatish Balay @*/
112579bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
112679bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
112779bdfe76SSatish Balay {
112879bdfe76SSatish Balay   Mat          B;
112979bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
1130cee3aa6bSSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE;
113179bdfe76SSatish Balay 
113279bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
113379bdfe76SSatish Balay   *A = 0;
113479bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
113579bdfe76SSatish Balay   PLogObjectCreate(B);
113679bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
113779bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
113879bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
113979bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
114079bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
114157b952d6SSatish Balay 
114279bdfe76SSatish Balay   B->factor     = 0;
114379bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
114479bdfe76SSatish Balay 
114579bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
114679bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
114779bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
114879bdfe76SSatish Balay 
114979bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
115057b952d6SSatish Balay     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1151cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified");
1152cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified");
1153cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1154cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
115579bdfe76SSatish Balay 
115679bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
115779bdfe76SSatish Balay     work[0] = m; work[1] = n;
115879bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
115979bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
116079bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
116179bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
116279bdfe76SSatish Balay   }
116379bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
116479bdfe76SSatish Balay     Mbs = M/bs;
116579bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
116679bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
116779bdfe76SSatish Balay     m   = mbs*bs;
116879bdfe76SSatish Balay   }
116979bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
117079bdfe76SSatish Balay     Nbs = N/bs;
117179bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
117279bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
117379bdfe76SSatish Balay     n   = nbs*bs;
117479bdfe76SSatish Balay   }
1175cee3aa6bSSatish Balay   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
117679bdfe76SSatish Balay 
117779bdfe76SSatish Balay   b->m = m; B->m = m;
117879bdfe76SSatish Balay   b->n = n; B->n = n;
117979bdfe76SSatish Balay   b->N = N; B->N = N;
118079bdfe76SSatish Balay   b->M = M; B->M = M;
118179bdfe76SSatish Balay   b->bs  = bs;
118279bdfe76SSatish Balay   b->bs2 = bs*bs;
118379bdfe76SSatish Balay   b->mbs = mbs;
118479bdfe76SSatish Balay   b->nbs = nbs;
118579bdfe76SSatish Balay   b->Mbs = Mbs;
118679bdfe76SSatish Balay   b->Nbs = Nbs;
118779bdfe76SSatish Balay 
118879bdfe76SSatish Balay   /* build local table of row and column ownerships */
118979bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
119079bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
11910ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
119279bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
119379bdfe76SSatish Balay   b->rowners[0] = 0;
119479bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
119579bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
119679bdfe76SSatish Balay   }
119779bdfe76SSatish Balay   b->rstart = b->rowners[b->rank];
119879bdfe76SSatish Balay   b->rend   = b->rowners[b->rank+1];
119979bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
120079bdfe76SSatish Balay   b->cowners[0] = 0;
120179bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
120279bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
120379bdfe76SSatish Balay   }
120479bdfe76SSatish Balay   b->cstart = b->cowners[b->rank];
120579bdfe76SSatish Balay   b->cend   = b->cowners[b->rank+1];
120679bdfe76SSatish Balay 
120779bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
120879bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
120979bdfe76SSatish Balay   PLogObjectParent(B,b->A);
121079bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
121179bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
121279bdfe76SSatish Balay   PLogObjectParent(B,b->B);
121379bdfe76SSatish Balay 
121479bdfe76SSatish Balay   /* build cache for off array entries formed */
121579bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
121679bdfe76SSatish Balay   b->colmap      = 0;
121779bdfe76SSatish Balay   b->garray      = 0;
121879bdfe76SSatish Balay   b->roworiented = 1;
121979bdfe76SSatish Balay 
122079bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
122179bdfe76SSatish Balay   b->lvec      = 0;
122279bdfe76SSatish Balay   b->Mvctx     = 0;
122379bdfe76SSatish Balay 
122479bdfe76SSatish Balay   /* stuff for MatGetRow() */
122579bdfe76SSatish Balay   b->rowindices   = 0;
122679bdfe76SSatish Balay   b->rowvalues    = 0;
122779bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
122879bdfe76SSatish Balay 
122979bdfe76SSatish Balay   *A = B;
123079bdfe76SSatish Balay   return 0;
123179bdfe76SSatish Balay }
12320ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
12330ac07820SSatish Balay {
12340ac07820SSatish Balay   Mat        mat;
12350ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
12360ac07820SSatish Balay   int        ierr, len=0, flg;
12370ac07820SSatish Balay 
12380ac07820SSatish Balay   *newmat       = 0;
12390ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
12400ac07820SSatish Balay   PLogObjectCreate(mat);
12410ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
12420ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
12430ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
12440ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
12450ac07820SSatish Balay   mat->factor     = matin->factor;
12460ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
12470ac07820SSatish Balay 
12480ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
12490ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
12500ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
12510ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
12520ac07820SSatish Balay 
12530ac07820SSatish Balay   a->bs  = oldmat->bs;
12540ac07820SSatish Balay   a->bs2 = oldmat->bs2;
12550ac07820SSatish Balay   a->mbs = oldmat->mbs;
12560ac07820SSatish Balay   a->nbs = oldmat->nbs;
12570ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
12580ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
12590ac07820SSatish Balay 
12600ac07820SSatish Balay   a->rstart       = oldmat->rstart;
12610ac07820SSatish Balay   a->rend         = oldmat->rend;
12620ac07820SSatish Balay   a->cstart       = oldmat->cstart;
12630ac07820SSatish Balay   a->cend         = oldmat->cend;
12640ac07820SSatish Balay   a->size         = oldmat->size;
12650ac07820SSatish Balay   a->rank         = oldmat->rank;
12660ac07820SSatish Balay   a->insertmode   = NOT_SET_VALUES;
12670ac07820SSatish Balay   a->rowvalues    = 0;
12680ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
12690ac07820SSatish Balay 
12700ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
12710ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
12720ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
12730ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
12740ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
12750ac07820SSatish Balay   if (oldmat->colmap) {
12760ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
12770ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
12780ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
12790ac07820SSatish Balay   } else a->colmap = 0;
12800ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
12810ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
12820ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
12830ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
12840ac07820SSatish Balay   } else a->garray = 0;
12850ac07820SSatish Balay 
12860ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
12870ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
12880ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
12890ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
12900ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
12910ac07820SSatish Balay   PLogObjectParent(mat,a->A);
12920ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
12930ac07820SSatish Balay   PLogObjectParent(mat,a->B);
12940ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
12950ac07820SSatish Balay   if (flg) {
12960ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
12970ac07820SSatish Balay   }
12980ac07820SSatish Balay   *newmat = mat;
12990ac07820SSatish Balay   return 0;
13000ac07820SSatish Balay }
130157b952d6SSatish Balay 
130257b952d6SSatish Balay #include "sys.h"
130357b952d6SSatish Balay 
130457b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
130557b952d6SSatish Balay {
130657b952d6SSatish Balay   Mat          A;
130757b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
130857b952d6SSatish Balay   Scalar       *vals,*buf;
130957b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
131057b952d6SSatish Balay   MPI_Status   status;
1311cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
131257b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
131357b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
131457b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
131557b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
131657b952d6SSatish Balay 
131757b952d6SSatish Balay 
131857b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
131957b952d6SSatish Balay   bs2  = bs*bs;
132057b952d6SSatish Balay 
132157b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
132257b952d6SSatish Balay   if (!rank) {
132357b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
132457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1325cee3aa6bSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object");
132657b952d6SSatish Balay   }
132757b952d6SSatish Balay 
132857b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
132957b952d6SSatish Balay   M = header[1]; N = header[2];
133057b952d6SSatish Balay 
133157b952d6SSatish Balay   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
133257b952d6SSatish Balay 
133357b952d6SSatish Balay   /*
133457b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
133557b952d6SSatish Balay      divisible by the blocksize
133657b952d6SSatish Balay   */
133757b952d6SSatish Balay   Mbs        = M/bs;
133857b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
133957b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
134057b952d6SSatish Balay   else                  Mbs++;
134157b952d6SSatish Balay   if (extra_rows &&!rank) {
1342537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
134357b952d6SSatish Balay   }
1344537820f0SBarry Smith 
134557b952d6SSatish Balay   /* determine ownership of all rows */
134657b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
134757b952d6SSatish Balay   m   = mbs * bs;
1348cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1349cee3aa6bSSatish Balay   browners = rowners + size + 1;
135057b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
135157b952d6SSatish Balay   rowners[0] = 0;
1352cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1353cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
135457b952d6SSatish Balay   rstart = rowners[rank];
135557b952d6SSatish Balay   rend   = rowners[rank+1];
135657b952d6SSatish Balay 
135757b952d6SSatish Balay   /* distribute row lengths to all processors */
135857b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
135957b952d6SSatish Balay   if (!rank) {
136057b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
136157b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
136257b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
136357b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1364cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1365cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
136657b952d6SSatish Balay     PetscFree(sndcounts);
136757b952d6SSatish Balay   }
136857b952d6SSatish Balay   else {
136957b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
137057b952d6SSatish Balay   }
137157b952d6SSatish Balay 
137257b952d6SSatish Balay   if (!rank) {
137357b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
137457b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
137557b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
137657b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
137757b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
137857b952d6SSatish Balay         procsnz[i] += rowlengths[j];
137957b952d6SSatish Balay       }
138057b952d6SSatish Balay     }
138157b952d6SSatish Balay     PetscFree(rowlengths);
138257b952d6SSatish Balay 
138357b952d6SSatish Balay     /* determine max buffer needed and allocate it */
138457b952d6SSatish Balay     maxnz = 0;
138557b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
138657b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
138757b952d6SSatish Balay     }
138857b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
138957b952d6SSatish Balay 
139057b952d6SSatish Balay     /* read in my part of the matrix column indices  */
139157b952d6SSatish Balay     nz = procsnz[0];
139257b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
139357b952d6SSatish Balay     mycols = ibuf;
1394cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
139557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1396cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1397cee3aa6bSSatish Balay 
139857b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
139957b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
140057b952d6SSatish Balay       nz = procsnz[i];
140157b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
140257b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
140357b952d6SSatish Balay     }
140457b952d6SSatish Balay     /* read in the stuff for the last proc */
140557b952d6SSatish Balay     if ( size != 1 ) {
140657b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
140757b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
140857b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1409cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
141057b952d6SSatish Balay     }
141157b952d6SSatish Balay     PetscFree(cols);
141257b952d6SSatish Balay   }
141357b952d6SSatish Balay   else {
141457b952d6SSatish Balay     /* determine buffer space needed for message */
141557b952d6SSatish Balay     nz = 0;
141657b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
141757b952d6SSatish Balay       nz += locrowlens[i];
141857b952d6SSatish Balay     }
141957b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
142057b952d6SSatish Balay     mycols = ibuf;
142157b952d6SSatish Balay     /* receive message of column indices*/
142257b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
142357b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1424cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
142557b952d6SSatish Balay   }
142657b952d6SSatish Balay 
142757b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1428cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1429cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
143057b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1431cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
143257b952d6SSatish Balay   masked1 = mask    + Mbs;
143357b952d6SSatish Balay   masked2 = masked1 + Mbs;
143457b952d6SSatish Balay   rowcount = 0; nzcount = 0;
143557b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
143657b952d6SSatish Balay     dcount  = 0;
143757b952d6SSatish Balay     odcount = 0;
143857b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
143957b952d6SSatish Balay       kmax = locrowlens[rowcount];
144057b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
144157b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
144257b952d6SSatish Balay         if (!mask[tmp]) {
144357b952d6SSatish Balay           mask[tmp] = 1;
144457b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
144557b952d6SSatish Balay           else masked1[dcount++] = tmp;
144657b952d6SSatish Balay         }
144757b952d6SSatish Balay       }
144857b952d6SSatish Balay       rowcount++;
144957b952d6SSatish Balay     }
1450cee3aa6bSSatish Balay 
145157b952d6SSatish Balay     dlens[i]  = dcount;
145257b952d6SSatish Balay     odlens[i] = odcount;
1453cee3aa6bSSatish Balay 
145457b952d6SSatish Balay     /* zero out the mask elements we set */
145557b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
145657b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
145757b952d6SSatish Balay   }
1458cee3aa6bSSatish Balay 
145957b952d6SSatish Balay   /* create our matrix */
1460537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1461537820f0SBarry Smith          CHKERRQ(ierr);
146257b952d6SSatish Balay   A = *newmat;
14636d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
146457b952d6SSatish Balay 
146557b952d6SSatish Balay   if (!rank) {
146657b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
146757b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
146857b952d6SSatish Balay     nz = procsnz[0];
146957b952d6SSatish Balay     vals = buf;
1470cee3aa6bSSatish Balay     mycols = ibuf;
1471cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
147257b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1473cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1474537820f0SBarry Smith 
147557b952d6SSatish Balay     /* insert into matrix */
147657b952d6SSatish Balay     jj      = rstart*bs;
147757b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
147857b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
147957b952d6SSatish Balay       mycols += locrowlens[i];
148057b952d6SSatish Balay       vals   += locrowlens[i];
148157b952d6SSatish Balay       jj++;
148257b952d6SSatish Balay     }
148357b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
148457b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
148557b952d6SSatish Balay       nz = procsnz[i];
148657b952d6SSatish Balay       vals = buf;
148757b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
148857b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
148957b952d6SSatish Balay     }
149057b952d6SSatish Balay     /* the last proc */
149157b952d6SSatish Balay     if ( size != 1 ){
149257b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1493cee3aa6bSSatish Balay       vals = buf;
149457b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
149557b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1496cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
149757b952d6SSatish Balay     }
149857b952d6SSatish Balay     PetscFree(procsnz);
149957b952d6SSatish Balay   }
150057b952d6SSatish Balay   else {
150157b952d6SSatish Balay     /* receive numeric values */
150257b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
150357b952d6SSatish Balay 
150457b952d6SSatish Balay     /* receive message of values*/
150557b952d6SSatish Balay     vals = buf;
1506cee3aa6bSSatish Balay     mycols = ibuf;
150757b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
150857b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1509cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
151057b952d6SSatish Balay 
151157b952d6SSatish Balay     /* insert into matrix */
151257b952d6SSatish Balay     jj      = rstart*bs;
1513cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
151457b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
151557b952d6SSatish Balay       mycols += locrowlens[i];
151657b952d6SSatish Balay       vals   += locrowlens[i];
151757b952d6SSatish Balay       jj++;
151857b952d6SSatish Balay     }
151957b952d6SSatish Balay   }
152057b952d6SSatish Balay   PetscFree(locrowlens);
152157b952d6SSatish Balay   PetscFree(buf);
152257b952d6SSatish Balay   PetscFree(ibuf);
152357b952d6SSatish Balay   PetscFree(rowners);
152457b952d6SSatish Balay   PetscFree(dlens);
1525cee3aa6bSSatish Balay   PetscFree(mask);
15266d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15276d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
152857b952d6SSatish Balay   return 0;
152957b952d6SSatish Balay }
153057b952d6SSatish Balay 
153157b952d6SSatish Balay 
1532