xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision eada665151041bf9d7614ddbfca52443d2b77989)
179bdfe76SSatish Balay #ifndef lint
2*eada6651SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.47 1997/01/13 17:27:40 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2357b952d6SSatish Balay 
243b2fbd54SBarry Smith 
25537820f0SBarry Smith /*
26537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2957b952d6SSatish Balay    length of colmap equals the global matrix length.
3057b952d6SSatish Balay */
315615d1e5SSatish Balay #undef __FUNC__
325615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3457b952d6SSatish Balay {
3557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
37928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3857b952d6SSatish Balay 
3957b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
4057b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4157b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
42928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4357b952d6SSatish Balay   return 0;
4457b952d6SSatish Balay }
4557b952d6SSatish Balay 
465615d1e5SSatish Balay #undef __FUNC__
475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
483b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
5057b952d6SSatish Balay {
513b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5257b952d6SSatish Balay   int         ierr;
533b2fbd54SBarry Smith   if (aij->size == 1) {
543b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
55e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
563b2fbd54SBarry Smith   return 0;
573b2fbd54SBarry Smith }
583b2fbd54SBarry Smith 
595615d1e5SSatish Balay #undef __FUNC__
605615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
613b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
623b2fbd54SBarry Smith                                 PetscTruth *done)
633b2fbd54SBarry Smith {
643b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
653b2fbd54SBarry Smith   int        ierr;
663b2fbd54SBarry Smith   if (aij->size == 1) {
673b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
68e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6957b952d6SSatish Balay   return 0;
7057b952d6SSatish Balay }
7180c1aa95SSatish Balay #define CHUNKSIZE  10
7280c1aa95SSatish Balay 
73*eada6651SSatish Balay #define  MatSetValues_SeqBAIJ_A_Private( row, col, value) \
7480c1aa95SSatish Balay { \
7580c1aa95SSatish Balay  \
7680c1aa95SSatish Balay     brow = row/bs;  \
7780c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
7880c1aa95SSatish Balay     rmax = imax[brow]; nrow = ailen[brow]; \
7980c1aa95SSatish Balay       bcol = col/bs; \
8080c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
8180c1aa95SSatish Balay       for ( _i=0; _i<nrow; _i++ ) { \
8280c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
8380c1aa95SSatish Balay         if (rp[_i] == bcol) { \
8480c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
85*eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
86*eada6651SSatish Balay           else                    *bap  = value;  \
8780c1aa95SSatish Balay           goto _noinsert; \
8880c1aa95SSatish Balay         } \
8980c1aa95SSatish Balay       } \
9080c1aa95SSatish Balay       if (nrow >= rmax) { \
9180c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
9280c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
9380c1aa95SSatish Balay         Scalar *new_a; \
9480c1aa95SSatish Balay  \
9580c1aa95SSatish Balay         /* malloc new storage space */ \
9680c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
9780c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
9880c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
9980c1aa95SSatish Balay         new_i   = new_j + new_nz; \
10080c1aa95SSatish Balay  \
10180c1aa95SSatish Balay         /* copy over old data into new slots */ \
10280c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
10380c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
10480c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
10580c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
10680c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
10780c1aa95SSatish Balay                                                            len*sizeof(int)); \
10880c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
10980c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
11080c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
11180c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
11280c1aa95SSatish Balay         /* free up old matrix storage */ \
11380c1aa95SSatish Balay         PetscFree(a->a);  \
11480c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
11580c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
11680c1aa95SSatish Balay         a->singlemalloc = 1; \
11780c1aa95SSatish Balay  \
11880c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
11980c1aa95SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE; \
12080c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
12180c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
12280c1aa95SSatish Balay         a->reallocs++; \
12380c1aa95SSatish Balay         a->nz++; \
12480c1aa95SSatish Balay       } \
12580c1aa95SSatish Balay       N = nrow++ - 1;  \
12680c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
12780c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
12880c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
12980c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
13080c1aa95SSatish Balay       } \
13180c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
13280c1aa95SSatish Balay       rp[_i]                      = bcol;  \
13380c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
13480c1aa95SSatish Balay       _noinsert:; \
13580c1aa95SSatish Balay     ailen[brow] = nrow; \
13680c1aa95SSatish Balay }
13757b952d6SSatish Balay 
138639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
1395615d1e5SSatish Balay #undef __FUNC__
1405615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
14157b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
14257b952d6SSatish Balay {
14357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
14457b952d6SSatish Balay   Scalar      value;
1454fa0d573SSatish Balay   int         ierr,i,j,row,col;
1464fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
1474fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
1484fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
14957b952d6SSatish Balay 
150*eada6651SSatish Balay   /* Some Variables required in the macro */
15180c1aa95SSatish Balay   Mat A = baij->A;
15280c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
15380c1aa95SSatish Balay   int         *rp,ii,nrow,_i,rmax,N;
15480c1aa95SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
15580c1aa95SSatish Balay   int         *aj=a->j,brow,bcol;
15680c1aa95SSatish Balay   int          ridx,cidx,bs2=a->bs2;
15780c1aa95SSatish Balay   Scalar      *ap,*aa=a->a,*bap;
15880c1aa95SSatish Balay 
1592c3acbe9SBarry Smith #if defined(PETSC_BOPT_g)
16057b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
161e3372554SBarry Smith     SETERRQ(1,0,"Cannot mix inserts and adds");
16257b952d6SSatish Balay   }
1632c3acbe9SBarry Smith #endif
16457b952d6SSatish Balay   baij->insertmode = addv;
16557b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
166639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
167e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
168e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
169639f9d9dSBarry Smith #endif
17057b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
17157b952d6SSatish Balay       row = im[i] - rstart_orig;
17257b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
17357b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
17457b952d6SSatish Balay           col = in[j] - cstart_orig;
17557b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
176*eada6651SSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value);
17780c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
17857b952d6SSatish Balay         }
179639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
180e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
181e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
182639f9d9dSBarry Smith #endif
18357b952d6SSatish Balay         else {
18457b952d6SSatish Balay           if (mat->was_assembled) {
185905e6a2fSBarry Smith             if (!baij->colmap) {
186905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
187905e6a2fSBarry Smith             }
188905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
18957b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
19057b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
19157b952d6SSatish Balay               col =  in[j];
19257b952d6SSatish Balay             }
19357b952d6SSatish Balay           }
19457b952d6SSatish Balay           else col = in[j];
19557b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
196639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
19757b952d6SSatish Balay         }
19857b952d6SSatish Balay       }
19957b952d6SSatish Balay     }
20057b952d6SSatish Balay     else {
20190f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
20257b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
20357b952d6SSatish Balay       }
20457b952d6SSatish Balay       else {
20590f02eecSBarry Smith         if (!baij->donotstash) {
20657b952d6SSatish Balay           row = im[i];
20757b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
20857b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
20957b952d6SSatish Balay           }
21057b952d6SSatish Balay         }
21157b952d6SSatish Balay       }
21257b952d6SSatish Balay     }
21390f02eecSBarry Smith   }
21457b952d6SSatish Balay   return 0;
21557b952d6SSatish Balay }
21657b952d6SSatish Balay 
2175615d1e5SSatish Balay #undef __FUNC__
2185615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
219d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
220d6de1c52SSatish Balay {
221d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
222d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
223d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
224d6de1c52SSatish Balay 
225d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
226e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
227e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
228d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
229d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
230d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
231e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
232e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
233d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
234d6de1c52SSatish Balay           col = idxn[j] - bscstart;
235d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
236d6de1c52SSatish Balay         }
237d6de1c52SSatish Balay         else {
238905e6a2fSBarry Smith           if (!baij->colmap) {
239905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
240905e6a2fSBarry Smith           }
241e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
242dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
243d9d09a02SSatish Balay           else {
244dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
245d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
246d6de1c52SSatish Balay           }
247d6de1c52SSatish Balay         }
248d6de1c52SSatish Balay       }
249d9d09a02SSatish Balay     }
250d6de1c52SSatish Balay     else {
251e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
252d6de1c52SSatish Balay     }
253d6de1c52SSatish Balay   }
254d6de1c52SSatish Balay   return 0;
255d6de1c52SSatish Balay }
256d6de1c52SSatish Balay 
2575615d1e5SSatish Balay #undef __FUNC__
2585615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
259d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
260d6de1c52SSatish Balay {
261d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
262d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
263acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
264d6de1c52SSatish Balay   double     sum = 0.0;
265d6de1c52SSatish Balay   Scalar     *v;
266d6de1c52SSatish Balay 
267d6de1c52SSatish Balay   if (baij->size == 1) {
268d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
269d6de1c52SSatish Balay   } else {
270d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
271d6de1c52SSatish Balay       v = amat->a;
272d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
273d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
274d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
275d6de1c52SSatish Balay #else
276d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
277d6de1c52SSatish Balay #endif
278d6de1c52SSatish Balay       }
279d6de1c52SSatish Balay       v = bmat->a;
280d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
281d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
282d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
283d6de1c52SSatish Balay #else
284d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
285d6de1c52SSatish Balay #endif
286d6de1c52SSatish Balay       }
287d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
288d6de1c52SSatish Balay       *norm = sqrt(*norm);
289d6de1c52SSatish Balay     }
290acdf5bf4SSatish Balay     else
291e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
292d6de1c52SSatish Balay   }
293d6de1c52SSatish Balay   return 0;
294d6de1c52SSatish Balay }
29557b952d6SSatish Balay 
2965615d1e5SSatish Balay #undef __FUNC__
2975615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
29857b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
29957b952d6SSatish Balay {
30057b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
30157b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
30257b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
30357b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
30457b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
30557b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
30657b952d6SSatish Balay   InsertMode  addv;
30757b952d6SSatish Balay   Scalar      *rvalues,*svalues;
30857b952d6SSatish Balay 
30957b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
31057b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
31157b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
312e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
31357b952d6SSatish Balay   }
31457b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
31557b952d6SSatish Balay 
31657b952d6SSatish Balay   /*  first count number of contributors to each processor */
31757b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
31857b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
31957b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
32057b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
32157b952d6SSatish Balay     idx = baij->stash.idx[i];
32257b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
32357b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
32457b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
32557b952d6SSatish Balay       }
32657b952d6SSatish Balay     }
32757b952d6SSatish Balay   }
32857b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
32957b952d6SSatish Balay 
33057b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
33157b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
33257b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
33357b952d6SSatish Balay   nreceives = work[rank];
33457b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
33557b952d6SSatish Balay   nmax = work[rank];
33657b952d6SSatish Balay   PetscFree(work);
33757b952d6SSatish Balay 
33857b952d6SSatish Balay   /* post receives:
33957b952d6SSatish Balay        1) each message will consist of ordered pairs
34057b952d6SSatish Balay      (global index,value) we store the global index as a double
34157b952d6SSatish Balay      to simplify the message passing.
34257b952d6SSatish Balay        2) since we don't know how long each individual message is we
34357b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
34457b952d6SSatish Balay      this is a lot of wasted space.
34557b952d6SSatish Balay 
34657b952d6SSatish Balay 
34757b952d6SSatish Balay        This could be done better.
34857b952d6SSatish Balay   */
34957b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
35057b952d6SSatish Balay   CHKPTRQ(rvalues);
35157b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
35257b952d6SSatish Balay   CHKPTRQ(recv_waits);
35357b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
35457b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
35557b952d6SSatish Balay               comm,recv_waits+i);
35657b952d6SSatish Balay   }
35757b952d6SSatish Balay 
35857b952d6SSatish Balay   /* do sends:
35957b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
36057b952d6SSatish Balay          the ith processor
36157b952d6SSatish Balay   */
36257b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
36357b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
36457b952d6SSatish Balay   CHKPTRQ(send_waits);
36557b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
36657b952d6SSatish Balay   starts[0] = 0;
36757b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
36857b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
36957b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
37057b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
37157b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
37257b952d6SSatish Balay   }
37357b952d6SSatish Balay   PetscFree(owner);
37457b952d6SSatish Balay   starts[0] = 0;
37557b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
37657b952d6SSatish Balay   count = 0;
37757b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
37857b952d6SSatish Balay     if (procs[i]) {
37957b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
38057b952d6SSatish Balay                 comm,send_waits+count++);
38157b952d6SSatish Balay     }
38257b952d6SSatish Balay   }
38357b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
38457b952d6SSatish Balay 
38557b952d6SSatish Balay   /* Free cache space */
386d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
38757b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
38857b952d6SSatish Balay 
38957b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
39057b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
39157b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
39257b952d6SSatish Balay   baij->rmax       = nmax;
39357b952d6SSatish Balay 
39457b952d6SSatish Balay   return 0;
39557b952d6SSatish Balay }
39657b952d6SSatish Balay 
39757b952d6SSatish Balay 
3985615d1e5SSatish Balay #undef __FUNC__
3995615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
40057b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
40157b952d6SSatish Balay {
40257b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
40357b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
40457b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
40557b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
40657b952d6SSatish Balay   Scalar      *values,val;
40757b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
40857b952d6SSatish Balay 
40957b952d6SSatish Balay   /*  wait on receives */
41057b952d6SSatish Balay   while (count) {
41157b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
41257b952d6SSatish Balay     /* unpack receives into our local space */
41357b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
41457b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
41557b952d6SSatish Balay     n = n/3;
41657b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
41757b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
41857b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
41957b952d6SSatish Balay       val = values[3*i+2];
42057b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
42157b952d6SSatish Balay         col -= baij->cstart*bs;
42257b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
42357b952d6SSatish Balay       }
42457b952d6SSatish Balay       else {
42557b952d6SSatish Balay         if (mat->was_assembled) {
426905e6a2fSBarry Smith           if (!baij->colmap) {
427905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
428905e6a2fSBarry Smith           }
429905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
43057b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
43157b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
43257b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
43357b952d6SSatish Balay           }
43457b952d6SSatish Balay         }
43557b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
43657b952d6SSatish Balay       }
43757b952d6SSatish Balay     }
43857b952d6SSatish Balay     count--;
43957b952d6SSatish Balay   }
44057b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
44157b952d6SSatish Balay 
44257b952d6SSatish Balay   /* wait on sends */
44357b952d6SSatish Balay   if (baij->nsends) {
44457b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
44557b952d6SSatish Balay     CHKPTRQ(send_status);
44657b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
44757b952d6SSatish Balay     PetscFree(send_status);
44857b952d6SSatish Balay   }
44957b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
45057b952d6SSatish Balay 
45157b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
45257b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
45357b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
45457b952d6SSatish Balay 
45557b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
45657b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
45757b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
45857b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
45957b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
46057b952d6SSatish Balay   }
46157b952d6SSatish Balay 
4626d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
46357b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
46457b952d6SSatish Balay   }
46557b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
46657b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
46757b952d6SSatish Balay 
46857b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
46957b952d6SSatish Balay   return 0;
47057b952d6SSatish Balay }
47157b952d6SSatish Balay 
4725615d1e5SSatish Balay #undef __FUNC__
4735615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
47457b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
47557b952d6SSatish Balay {
47657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
47757b952d6SSatish Balay   int          ierr;
47857b952d6SSatish Balay 
47957b952d6SSatish Balay   if (baij->size == 1) {
48057b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
48157b952d6SSatish Balay   }
482e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
48357b952d6SSatish Balay   return 0;
48457b952d6SSatish Balay }
48557b952d6SSatish Balay 
4865615d1e5SSatish Balay #undef __FUNC__
4875615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
48857b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
48957b952d6SSatish Balay {
49057b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
491cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
49257b952d6SSatish Balay   FILE         *fd;
49357b952d6SSatish Balay   ViewerType   vtype;
49457b952d6SSatish Balay 
49557b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
49657b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
49757b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
498639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
4994e220ebcSLois Curfman McInnes       MatInfo info;
50057b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
50157b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
5024e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
50357b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
50457b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
5054e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
5064e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
5074e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
5084e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
5094e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
5104e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
51157b952d6SSatish Balay       fflush(fd);
51257b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
51357b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
51457b952d6SSatish Balay       return 0;
51557b952d6SSatish Balay     }
516639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
517bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
51857b952d6SSatish Balay       return 0;
51957b952d6SSatish Balay     }
52057b952d6SSatish Balay   }
52157b952d6SSatish Balay 
52257b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
52357b952d6SSatish Balay     Draw       draw;
52457b952d6SSatish Balay     PetscTruth isnull;
52557b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
52657b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
52757b952d6SSatish Balay   }
52857b952d6SSatish Balay 
52957b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
53057b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
53157b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
53257b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
53357b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
53457b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
53557b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
53657b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
53757b952d6SSatish Balay     fflush(fd);
53857b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
53957b952d6SSatish Balay   }
54057b952d6SSatish Balay   else {
54157b952d6SSatish Balay     int size = baij->size;
54257b952d6SSatish Balay     rank = baij->rank;
54357b952d6SSatish Balay     if (size == 1) {
54457b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
54557b952d6SSatish Balay     }
54657b952d6SSatish Balay     else {
54757b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
54857b952d6SSatish Balay       Mat         A;
54957b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
55057b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
55157b952d6SSatish Balay       int         mbs=baij->mbs;
55257b952d6SSatish Balay       Scalar      *a;
55357b952d6SSatish Balay 
55457b952d6SSatish Balay       if (!rank) {
555cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
55657b952d6SSatish Balay         CHKERRQ(ierr);
55757b952d6SSatish Balay       }
55857b952d6SSatish Balay       else {
559cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
56057b952d6SSatish Balay         CHKERRQ(ierr);
56157b952d6SSatish Balay       }
56257b952d6SSatish Balay       PLogObjectParent(mat,A);
56357b952d6SSatish Balay 
56457b952d6SSatish Balay       /* copy over the A part */
56557b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
56657b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
56757b952d6SSatish Balay       row = baij->rstart;
56857b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
56957b952d6SSatish Balay 
57057b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
57157b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
57257b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
57357b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
57457b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
57557b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
576cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
577cee3aa6bSSatish Balay             col++; a += bs;
57857b952d6SSatish Balay           }
57957b952d6SSatish Balay         }
58057b952d6SSatish Balay       }
58157b952d6SSatish Balay       /* copy over the B part */
58257b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
58357b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
58457b952d6SSatish Balay       row = baij->rstart*bs;
58557b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
58657b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
58757b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
58857b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
58957b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
59057b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
591cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
592cee3aa6bSSatish Balay             col++; a += bs;
59357b952d6SSatish Balay           }
59457b952d6SSatish Balay         }
59557b952d6SSatish Balay       }
59657b952d6SSatish Balay       PetscFree(rvals);
5976d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5986d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
59957b952d6SSatish Balay       if (!rank) {
60057b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
60157b952d6SSatish Balay       }
60257b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
60357b952d6SSatish Balay     }
60457b952d6SSatish Balay   }
60557b952d6SSatish Balay   return 0;
60657b952d6SSatish Balay }
60757b952d6SSatish Balay 
60857b952d6SSatish Balay 
60957b952d6SSatish Balay 
6105615d1e5SSatish Balay #undef __FUNC__
6115615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
61257b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
61357b952d6SSatish Balay {
61457b952d6SSatish Balay   Mat         mat = (Mat) obj;
61557b952d6SSatish Balay   int         ierr;
61657b952d6SSatish Balay   ViewerType  vtype;
61757b952d6SSatish Balay 
61857b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
61957b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
62057b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
62157b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
62257b952d6SSatish Balay   }
62357b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
62457b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
62557b952d6SSatish Balay   }
62657b952d6SSatish Balay   return 0;
62757b952d6SSatish Balay }
62857b952d6SSatish Balay 
6295615d1e5SSatish Balay #undef __FUNC__
6305615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
63179bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
63279bdfe76SSatish Balay {
63379bdfe76SSatish Balay   Mat         mat = (Mat) obj;
63479bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
63579bdfe76SSatish Balay   int         ierr;
63679bdfe76SSatish Balay 
63779bdfe76SSatish Balay #if defined(PETSC_LOG)
63879bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
63979bdfe76SSatish Balay #endif
64079bdfe76SSatish Balay 
64179bdfe76SSatish Balay   PetscFree(baij->rowners);
64279bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
64379bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
64479bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
64579bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
64679bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
64779bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
64879bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
64979bdfe76SSatish Balay   PetscFree(baij);
65090f02eecSBarry Smith   if (mat->mapping) {
65190f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
65290f02eecSBarry Smith   }
65379bdfe76SSatish Balay   PLogObjectDestroy(mat);
65479bdfe76SSatish Balay   PetscHeaderDestroy(mat);
65579bdfe76SSatish Balay   return 0;
65679bdfe76SSatish Balay }
65779bdfe76SSatish Balay 
6585615d1e5SSatish Balay #undef __FUNC__
6595615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
660cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
661cee3aa6bSSatish Balay {
662cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
66347b4a8eaSLois Curfman McInnes   int         ierr, nt;
664cee3aa6bSSatish Balay 
665c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
66647b4a8eaSLois Curfman McInnes   if (nt != a->n) {
667e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and xx");
66847b4a8eaSLois Curfman McInnes   }
669c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
67047b4a8eaSLois Curfman McInnes   if (nt != a->m) {
671e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
67247b4a8eaSLois Curfman McInnes   }
67343a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
674cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
67543a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
676cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
67743a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
678cee3aa6bSSatish Balay   return 0;
679cee3aa6bSSatish Balay }
680cee3aa6bSSatish Balay 
6815615d1e5SSatish Balay #undef __FUNC__
6825615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
683cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
684cee3aa6bSSatish Balay {
685cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
686cee3aa6bSSatish Balay   int        ierr;
68743a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
688cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
68943a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
690cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
691cee3aa6bSSatish Balay   return 0;
692cee3aa6bSSatish Balay }
693cee3aa6bSSatish Balay 
6945615d1e5SSatish Balay #undef __FUNC__
6955615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
696cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
697cee3aa6bSSatish Balay {
698cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
699cee3aa6bSSatish Balay   int        ierr;
700cee3aa6bSSatish Balay 
701cee3aa6bSSatish Balay   /* do nondiagonal part */
702cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
703cee3aa6bSSatish Balay   /* send it on its way */
704537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
705cee3aa6bSSatish Balay   /* do local part */
706cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
707cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
708cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
709cee3aa6bSSatish Balay   /* but is not perhaps always true. */
710639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
711cee3aa6bSSatish Balay   return 0;
712cee3aa6bSSatish Balay }
713cee3aa6bSSatish Balay 
7145615d1e5SSatish Balay #undef __FUNC__
7155615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
716cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
717cee3aa6bSSatish Balay {
718cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
719cee3aa6bSSatish Balay   int        ierr;
720cee3aa6bSSatish Balay 
721cee3aa6bSSatish Balay   /* do nondiagonal part */
722cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
723cee3aa6bSSatish Balay   /* send it on its way */
724537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
725cee3aa6bSSatish Balay   /* do local part */
726cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
727cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
728cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
729cee3aa6bSSatish Balay   /* but is not perhaps always true. */
730537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
731cee3aa6bSSatish Balay   return 0;
732cee3aa6bSSatish Balay }
733cee3aa6bSSatish Balay 
734cee3aa6bSSatish Balay /*
735cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
736cee3aa6bSSatish Balay    diagonal block
737cee3aa6bSSatish Balay */
7385615d1e5SSatish Balay #undef __FUNC__
7395615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
740cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
741cee3aa6bSSatish Balay {
742cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
743cee3aa6bSSatish Balay   if (a->M != a->N)
744e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
745cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
746cee3aa6bSSatish Balay }
747cee3aa6bSSatish Balay 
7485615d1e5SSatish Balay #undef __FUNC__
7495615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
750cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
751cee3aa6bSSatish Balay {
752cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
753cee3aa6bSSatish Balay   int        ierr;
754cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
755cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
756cee3aa6bSSatish Balay   return 0;
757cee3aa6bSSatish Balay }
758026e39d0SSatish Balay 
7595615d1e5SSatish Balay #undef __FUNC__
7605615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
76157b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
76257b952d6SSatish Balay {
76357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
76457b952d6SSatish Balay   *m = mat->M; *n = mat->N;
76557b952d6SSatish Balay   return 0;
76657b952d6SSatish Balay }
76757b952d6SSatish Balay 
7685615d1e5SSatish Balay #undef __FUNC__
7695615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
77057b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
77157b952d6SSatish Balay {
77257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
77357b952d6SSatish Balay   *m = mat->m; *n = mat->N;
77457b952d6SSatish Balay   return 0;
77557b952d6SSatish Balay }
77657b952d6SSatish Balay 
7775615d1e5SSatish Balay #undef __FUNC__
7785615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
77957b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
78057b952d6SSatish Balay {
78157b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
78257b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
78357b952d6SSatish Balay   return 0;
78457b952d6SSatish Balay }
78557b952d6SSatish Balay 
786acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
787acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
788acdf5bf4SSatish Balay 
7895615d1e5SSatish Balay #undef __FUNC__
7905615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
791acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
792acdf5bf4SSatish Balay {
793acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
794acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
795acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
796d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
797d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
798acdf5bf4SSatish Balay 
799e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
800acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
801acdf5bf4SSatish Balay 
802acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
803acdf5bf4SSatish Balay     /*
804acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
805acdf5bf4SSatish Balay     */
806acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
807bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
808bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
809acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
810acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
811acdf5bf4SSatish Balay     }
812acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
813acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
814acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
815acdf5bf4SSatish Balay   }
816acdf5bf4SSatish Balay 
817acdf5bf4SSatish Balay 
818e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
819d9d09a02SSatish Balay   lrow = row - brstart;
820acdf5bf4SSatish Balay 
821acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
822acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
823acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
824acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
825acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
826acdf5bf4SSatish Balay   nztot = nzA + nzB;
827acdf5bf4SSatish Balay 
828acdf5bf4SSatish Balay   cmap  = mat->garray;
829acdf5bf4SSatish Balay   if (v  || idx) {
830acdf5bf4SSatish Balay     if (nztot) {
831acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
832acdf5bf4SSatish Balay       int imark = -1;
833acdf5bf4SSatish Balay       if (v) {
834acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
835acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
836d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
837acdf5bf4SSatish Balay           else break;
838acdf5bf4SSatish Balay         }
839acdf5bf4SSatish Balay         imark = i;
840acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
841acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
842acdf5bf4SSatish Balay       }
843acdf5bf4SSatish Balay       if (idx) {
844acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
845acdf5bf4SSatish Balay         if (imark > -1) {
846acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
847bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
848acdf5bf4SSatish Balay           }
849acdf5bf4SSatish Balay         } else {
850acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
851d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
852d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
853acdf5bf4SSatish Balay             else break;
854acdf5bf4SSatish Balay           }
855acdf5bf4SSatish Balay           imark = i;
856acdf5bf4SSatish Balay         }
857d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
858d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
859acdf5bf4SSatish Balay       }
860acdf5bf4SSatish Balay     }
861d212a18eSSatish Balay     else {
862d212a18eSSatish Balay       if (idx) *idx = 0;
863d212a18eSSatish Balay       if (v)   *v   = 0;
864d212a18eSSatish Balay     }
865acdf5bf4SSatish Balay   }
866acdf5bf4SSatish Balay   *nz = nztot;
867acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
868acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
869acdf5bf4SSatish Balay   return 0;
870acdf5bf4SSatish Balay }
871acdf5bf4SSatish Balay 
8725615d1e5SSatish Balay #undef __FUNC__
8735615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
874acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
875acdf5bf4SSatish Balay {
876acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
877acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
878e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
879acdf5bf4SSatish Balay   }
880acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
881acdf5bf4SSatish Balay   return 0;
882acdf5bf4SSatish Balay }
883acdf5bf4SSatish Balay 
8845615d1e5SSatish Balay #undef __FUNC__
8855615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
8865a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
8875a838052SSatish Balay {
8885a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
8895a838052SSatish Balay   *bs = baij->bs;
8905a838052SSatish Balay   return 0;
8915a838052SSatish Balay }
8925a838052SSatish Balay 
8935615d1e5SSatish Balay #undef __FUNC__
8945615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
89558667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A)
89658667388SSatish Balay {
89758667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
89858667388SSatish Balay   int         ierr;
89958667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
90058667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
90158667388SSatish Balay   return 0;
90258667388SSatish Balay }
9030ac07820SSatish Balay 
9045615d1e5SSatish Balay #undef __FUNC__
9055615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
9064e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
9070ac07820SSatish Balay {
9084e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
9094e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
9107d57db60SLois Curfman McInnes   int         ierr;
9117d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
9120ac07820SSatish Balay 
9134e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
9144e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
9154e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
9164e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
9174e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
9184e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
9194e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
9204e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
9214e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
9220ac07820SSatish Balay   if (flag == MAT_LOCAL) {
9234e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
9244e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
9254e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
9264e220ebcSLois Curfman McInnes     info->memory       = isend[3];
9274e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
9280ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
9290ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm);
9304e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9314e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9324e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9334e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9344e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
9350ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
9360ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm);
9374e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9384e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9394e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9404e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9414e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
9420ac07820SSatish Balay   }
9434e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
9444e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
9454e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
9460ac07820SSatish Balay   return 0;
9470ac07820SSatish Balay }
9480ac07820SSatish Balay 
9495615d1e5SSatish Balay #undef __FUNC__
9505615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
95158667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op)
95258667388SSatish Balay {
95358667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
95458667388SSatish Balay 
95558667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
95658667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
9576da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
958b1fbbac0SLois Curfman McInnes       op == MAT_COLUMNS_SORTED) {
959b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
960b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
961b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
962aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
96358667388SSatish Balay         MatSetOption(a->A,op);
96458667388SSatish Balay         MatSetOption(a->B,op);
965b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
9666da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
96758667388SSatish Balay              op == MAT_SYMMETRIC ||
96858667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
96958667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
97058667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
97158667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
97258667388SSatish Balay     a->roworiented = 0;
97358667388SSatish Balay     MatSetOption(a->A,op);
97458667388SSatish Balay     MatSetOption(a->B,op);
97590f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
97690f02eecSBarry Smith     a->donotstash = 1;
97790f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
978e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
97958667388SSatish Balay   else
980e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
98158667388SSatish Balay   return 0;
98258667388SSatish Balay }
98358667388SSatish Balay 
9845615d1e5SSatish Balay #undef __FUNC__
9855615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
9860ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
9870ac07820SSatish Balay {
9880ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
9890ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
9900ac07820SSatish Balay   Mat        B;
9910ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
9920ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
9930ac07820SSatish Balay   Scalar     *a;
9940ac07820SSatish Balay 
9950ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
996e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
9970ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
9980ac07820SSatish Balay   CHKERRQ(ierr);
9990ac07820SSatish Balay 
10000ac07820SSatish Balay   /* copy over the A part */
10010ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
10020ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
10030ac07820SSatish Balay   row = baij->rstart;
10040ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
10050ac07820SSatish Balay 
10060ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
10070ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
10080ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
10090ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
10100ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
10110ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
10120ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
10130ac07820SSatish Balay         col++; a += bs;
10140ac07820SSatish Balay       }
10150ac07820SSatish Balay     }
10160ac07820SSatish Balay   }
10170ac07820SSatish Balay   /* copy over the B part */
10180ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
10190ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
10200ac07820SSatish Balay   row = baij->rstart*bs;
10210ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
10220ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
10230ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
10240ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
10250ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
10260ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
10270ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
10280ac07820SSatish Balay         col++; a += bs;
10290ac07820SSatish Balay       }
10300ac07820SSatish Balay     }
10310ac07820SSatish Balay   }
10320ac07820SSatish Balay   PetscFree(rvals);
10330ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
10340ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
10350ac07820SSatish Balay 
10360ac07820SSatish Balay   if (matout != PETSC_NULL) {
10370ac07820SSatish Balay     *matout = B;
10380ac07820SSatish Balay   } else {
10390ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
10400ac07820SSatish Balay     PetscFree(baij->rowners);
10410ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
10420ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
10430ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
10440ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
10450ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
10460ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
10470ac07820SSatish Balay     PetscFree(baij);
10480ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
10490ac07820SSatish Balay     PetscHeaderDestroy(B);
10500ac07820SSatish Balay   }
10510ac07820SSatish Balay   return 0;
10520ac07820SSatish Balay }
10530e95ebc0SSatish Balay 
10545615d1e5SSatish Balay #undef __FUNC__
10555615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
10560e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
10570e95ebc0SSatish Balay {
10580e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
10590e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
10600e95ebc0SSatish Balay   int ierr,s1,s2,s3;
10610e95ebc0SSatish Balay 
10620e95ebc0SSatish Balay   if (ll)  {
10630e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
10640e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1065e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
10660e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
10670e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
10680e95ebc0SSatish Balay   }
1069e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
10700e95ebc0SSatish Balay   return 0;
10710e95ebc0SSatish Balay }
10720e95ebc0SSatish Balay 
10730ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
10740ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
10750ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
10760ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
10770ac07820SSatish Balay    routine.
10780ac07820SSatish Balay */
10795615d1e5SSatish Balay #undef __FUNC__
10805615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
10810ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
10820ac07820SSatish Balay {
10830ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
10840ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
10850ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
10860ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
10870ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
10880ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
10890ac07820SSatish Balay   MPI_Comm       comm = A->comm;
10900ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
10910ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
10920ac07820SSatish Balay   IS             istmp;
10930ac07820SSatish Balay 
10940ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
10950ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
10960ac07820SSatish Balay 
10970ac07820SSatish Balay   /*  first count number of contributors to each processor */
10980ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
10990ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
11000ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
11010ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
11020ac07820SSatish Balay     idx = rows[i];
11030ac07820SSatish Balay     found = 0;
11040ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
11050ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
11060ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
11070ac07820SSatish Balay       }
11080ac07820SSatish Balay     }
1109e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
11100ac07820SSatish Balay   }
11110ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
11120ac07820SSatish Balay 
11130ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
11140ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
11150ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
11160ac07820SSatish Balay   nrecvs = work[rank];
11170ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
11180ac07820SSatish Balay   nmax = work[rank];
11190ac07820SSatish Balay   PetscFree(work);
11200ac07820SSatish Balay 
11210ac07820SSatish Balay   /* post receives:   */
11220ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
11230ac07820SSatish Balay   CHKPTRQ(rvalues);
11240ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
11250ac07820SSatish Balay   CHKPTRQ(recv_waits);
11260ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
11270ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
11280ac07820SSatish Balay   }
11290ac07820SSatish Balay 
11300ac07820SSatish Balay   /* do sends:
11310ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
11320ac07820SSatish Balay          the ith processor
11330ac07820SSatish Balay   */
11340ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
11350ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
11360ac07820SSatish Balay   CHKPTRQ(send_waits);
11370ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
11380ac07820SSatish Balay   starts[0] = 0;
11390ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
11400ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
11410ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
11420ac07820SSatish Balay   }
11430ac07820SSatish Balay   ISRestoreIndices(is,&rows);
11440ac07820SSatish Balay 
11450ac07820SSatish Balay   starts[0] = 0;
11460ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
11470ac07820SSatish Balay   count = 0;
11480ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
11490ac07820SSatish Balay     if (procs[i]) {
11500ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
11510ac07820SSatish Balay     }
11520ac07820SSatish Balay   }
11530ac07820SSatish Balay   PetscFree(starts);
11540ac07820SSatish Balay 
11550ac07820SSatish Balay   base = owners[rank]*bs;
11560ac07820SSatish Balay 
11570ac07820SSatish Balay   /*  wait on receives */
11580ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
11590ac07820SSatish Balay   source = lens + nrecvs;
11600ac07820SSatish Balay   count  = nrecvs; slen = 0;
11610ac07820SSatish Balay   while (count) {
11620ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
11630ac07820SSatish Balay     /* unpack receives into our local space */
11640ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
11650ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
11660ac07820SSatish Balay     lens[imdex]  = n;
11670ac07820SSatish Balay     slen += n;
11680ac07820SSatish Balay     count--;
11690ac07820SSatish Balay   }
11700ac07820SSatish Balay   PetscFree(recv_waits);
11710ac07820SSatish Balay 
11720ac07820SSatish Balay   /* move the data into the send scatter */
11730ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
11740ac07820SSatish Balay   count = 0;
11750ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
11760ac07820SSatish Balay     values = rvalues + i*nmax;
11770ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
11780ac07820SSatish Balay       lrows[count++] = values[j] - base;
11790ac07820SSatish Balay     }
11800ac07820SSatish Balay   }
11810ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
11820ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
11830ac07820SSatish Balay 
11840ac07820SSatish Balay   /* actually zap the local rows */
1185537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
11860ac07820SSatish Balay   PLogObjectParent(A,istmp);
11870ac07820SSatish Balay   PetscFree(lrows);
11880ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
11890ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
11900ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
11910ac07820SSatish Balay 
11920ac07820SSatish Balay   /* wait on sends */
11930ac07820SSatish Balay   if (nsends) {
11940ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
11950ac07820SSatish Balay     CHKPTRQ(send_status);
11960ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
11970ac07820SSatish Balay     PetscFree(send_status);
11980ac07820SSatish Balay   }
11990ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
12000ac07820SSatish Balay 
12010ac07820SSatish Balay   return 0;
12020ac07820SSatish Balay }
1203ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
12045615d1e5SSatish Balay #undef __FUNC__
12055615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1206ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A)
1207ba4ca20aSSatish Balay {
1208ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1209ba4ca20aSSatish Balay 
1210ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1211ba4ca20aSSatish Balay   else return 0;
1212ba4ca20aSSatish Balay }
12130ac07820SSatish Balay 
12145615d1e5SSatish Balay #undef __FUNC__
12155615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1216bb5a7306SBarry Smith static int MatSetUnfactored_MPIBAIJ(Mat A)
1217bb5a7306SBarry Smith {
1218bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1219bb5a7306SBarry Smith   int         ierr;
1220bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1221bb5a7306SBarry Smith   return 0;
1222bb5a7306SBarry Smith }
1223bb5a7306SBarry Smith 
12240ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
12250ac07820SSatish Balay 
122679bdfe76SSatish Balay /* -------------------------------------------------------------------*/
122779bdfe76SSatish Balay static struct _MatOps MatOps = {
1228bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
12294c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
12304c50302cSBarry Smith   0,0,0,0,
12310ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
12320e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
123358667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
12344c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
12354c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
12364c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1237905e6a2fSBarry Smith   0,0,0,MatConvertSameType_MPIBAIJ,0,0,
1238d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1239ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1240bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1241bb5a7306SBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ};
124279bdfe76SSatish Balay 
124379bdfe76SSatish Balay 
12445615d1e5SSatish Balay #undef __FUNC__
12455615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
124679bdfe76SSatish Balay /*@C
124779bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
124879bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
124979bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
125079bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
125179bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
125279bdfe76SSatish Balay 
125379bdfe76SSatish Balay    Input Parameters:
125479bdfe76SSatish Balay .  comm - MPI communicator
125579bdfe76SSatish Balay .  bs   - size of blockk
125679bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
125792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
125892e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
125992e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
126092e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
126192e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
126279bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
126392e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
126479bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
126579bdfe76SSatish Balay            submatrix  (same for all local rows)
126692e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
126792e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
126892e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
126992e8d321SLois Curfman McInnes            it is zero.
127092e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
127179bdfe76SSatish Balay            submatrix (same for all local rows).
127292e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
127392e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
127492e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
127579bdfe76SSatish Balay 
127679bdfe76SSatish Balay    Output Parameter:
127779bdfe76SSatish Balay .  A - the matrix
127879bdfe76SSatish Balay 
127979bdfe76SSatish Balay    Notes:
128079bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
128179bdfe76SSatish Balay    (possibly both).
128279bdfe76SSatish Balay 
128379bdfe76SSatish Balay    Storage Information:
128479bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
128579bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
128679bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
128779bdfe76SSatish Balay    local matrix (a rectangular submatrix).
128879bdfe76SSatish Balay 
128979bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
129079bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
129179bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
129279bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
129379bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
129479bdfe76SSatish Balay 
129579bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
129679bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
129779bdfe76SSatish Balay 
129879bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
129979bdfe76SSatish Balay $         -------------------
130079bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
130179bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
130279bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
130379bdfe76SSatish Balay $         -------------------
130479bdfe76SSatish Balay $
130579bdfe76SSatish Balay 
130679bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
130779bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
130879bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
130957b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
131079bdfe76SSatish Balay 
131179bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
131279bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
131379bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
131492e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
131592e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
13166da5968aSLois Curfman McInnes    matrices.
131779bdfe76SSatish Balay 
131892e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
131979bdfe76SSatish Balay 
132079bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
132179bdfe76SSatish Balay @*/
132279bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
132379bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
132479bdfe76SSatish Balay {
132579bdfe76SSatish Balay   Mat          B;
132679bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
13274c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
132879bdfe76SSatish Balay 
1329e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
133079bdfe76SSatish Balay   *A = 0;
133179bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
133279bdfe76SSatish Balay   PLogObjectCreate(B);
133379bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
133479bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
133579bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
13364c50302cSBarry Smith   MPI_Comm_size(comm,&size);
13374c50302cSBarry Smith   if (size == 1) {
13384c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
13394c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
13404c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
13414c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
13424c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
13434c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
13444c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
13454c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
13464c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
13474c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
13484c50302cSBarry Smith   }
13494c50302cSBarry Smith 
135079bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
135179bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
135290f02eecSBarry Smith   B->mapping    = 0;
135379bdfe76SSatish Balay   B->factor     = 0;
135479bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
135579bdfe76SSatish Balay 
135679bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
135779bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
135879bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
135979bdfe76SSatish Balay 
136079bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1361e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1362e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1363e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1364cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1365cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
136679bdfe76SSatish Balay 
136779bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
136879bdfe76SSatish Balay     work[0] = m; work[1] = n;
136979bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
137079bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
137179bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
137279bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
137379bdfe76SSatish Balay   }
137479bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
137579bdfe76SSatish Balay     Mbs = M/bs;
1376e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
137779bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
137879bdfe76SSatish Balay     m   = mbs*bs;
137979bdfe76SSatish Balay   }
138079bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
138179bdfe76SSatish Balay     Nbs = N/bs;
1382e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
138379bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
138479bdfe76SSatish Balay     n   = nbs*bs;
138579bdfe76SSatish Balay   }
1386e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
138779bdfe76SSatish Balay 
138879bdfe76SSatish Balay   b->m = m; B->m = m;
138979bdfe76SSatish Balay   b->n = n; B->n = n;
139079bdfe76SSatish Balay   b->N = N; B->N = N;
139179bdfe76SSatish Balay   b->M = M; B->M = M;
139279bdfe76SSatish Balay   b->bs  = bs;
139379bdfe76SSatish Balay   b->bs2 = bs*bs;
139479bdfe76SSatish Balay   b->mbs = mbs;
139579bdfe76SSatish Balay   b->nbs = nbs;
139679bdfe76SSatish Balay   b->Mbs = Mbs;
139779bdfe76SSatish Balay   b->Nbs = Nbs;
139879bdfe76SSatish Balay 
139979bdfe76SSatish Balay   /* build local table of row and column ownerships */
140079bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
140179bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
14020ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
140379bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
140479bdfe76SSatish Balay   b->rowners[0] = 0;
140579bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
140679bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
140779bdfe76SSatish Balay   }
140879bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
140979bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
14104fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
14114fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
14124fa0d573SSatish Balay 
141379bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
141479bdfe76SSatish Balay   b->cowners[0] = 0;
141579bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
141679bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
141779bdfe76SSatish Balay   }
141879bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
141979bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
14204fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
14214fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
142279bdfe76SSatish Balay 
142379bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
142479bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
142579bdfe76SSatish Balay   PLogObjectParent(B,b->A);
142679bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
142779bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
142879bdfe76SSatish Balay   PLogObjectParent(B,b->B);
142979bdfe76SSatish Balay 
143079bdfe76SSatish Balay   /* build cache for off array entries formed */
143179bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
143290f02eecSBarry Smith   b->donotstash  = 0;
143379bdfe76SSatish Balay   b->colmap      = 0;
143479bdfe76SSatish Balay   b->garray      = 0;
143579bdfe76SSatish Balay   b->roworiented = 1;
143679bdfe76SSatish Balay 
143779bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
143879bdfe76SSatish Balay   b->lvec      = 0;
143979bdfe76SSatish Balay   b->Mvctx     = 0;
144079bdfe76SSatish Balay 
144179bdfe76SSatish Balay   /* stuff for MatGetRow() */
144279bdfe76SSatish Balay   b->rowindices   = 0;
144379bdfe76SSatish Balay   b->rowvalues    = 0;
144479bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
144579bdfe76SSatish Balay 
144679bdfe76SSatish Balay   *A = B;
144779bdfe76SSatish Balay   return 0;
144879bdfe76SSatish Balay }
1449026e39d0SSatish Balay 
14505615d1e5SSatish Balay #undef __FUNC__
14515615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
14520ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
14530ac07820SSatish Balay {
14540ac07820SSatish Balay   Mat        mat;
14550ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
14560ac07820SSatish Balay   int        ierr, len=0, flg;
14570ac07820SSatish Balay 
14580ac07820SSatish Balay   *newmat       = 0;
14590ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
14600ac07820SSatish Balay   PLogObjectCreate(mat);
14610ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
14620ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
14630ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
14640ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
14650ac07820SSatish Balay   mat->factor     = matin->factor;
14660ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
14670ac07820SSatish Balay 
14680ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
14690ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
14700ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
14710ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
14720ac07820SSatish Balay 
14730ac07820SSatish Balay   a->bs  = oldmat->bs;
14740ac07820SSatish Balay   a->bs2 = oldmat->bs2;
14750ac07820SSatish Balay   a->mbs = oldmat->mbs;
14760ac07820SSatish Balay   a->nbs = oldmat->nbs;
14770ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
14780ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
14790ac07820SSatish Balay 
14800ac07820SSatish Balay   a->rstart       = oldmat->rstart;
14810ac07820SSatish Balay   a->rend         = oldmat->rend;
14820ac07820SSatish Balay   a->cstart       = oldmat->cstart;
14830ac07820SSatish Balay   a->cend         = oldmat->cend;
14840ac07820SSatish Balay   a->size         = oldmat->size;
14850ac07820SSatish Balay   a->rank         = oldmat->rank;
14860ac07820SSatish Balay   a->insertmode   = NOT_SET_VALUES;
14870ac07820SSatish Balay   a->rowvalues    = 0;
14880ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
14890ac07820SSatish Balay 
14900ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
14910ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
14920ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
14930ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
14940ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14950ac07820SSatish Balay   if (oldmat->colmap) {
14960ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
14970ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
14980ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
14990ac07820SSatish Balay   } else a->colmap = 0;
15000ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
15010ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
15020ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
15030ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
15040ac07820SSatish Balay   } else a->garray = 0;
15050ac07820SSatish Balay 
15060ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
15070ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
15080ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
15090ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
15100ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
15110ac07820SSatish Balay   PLogObjectParent(mat,a->A);
15120ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
15130ac07820SSatish Balay   PLogObjectParent(mat,a->B);
15140ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
15150ac07820SSatish Balay   if (flg) {
15160ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
15170ac07820SSatish Balay   }
15180ac07820SSatish Balay   *newmat = mat;
15190ac07820SSatish Balay   return 0;
15200ac07820SSatish Balay }
152157b952d6SSatish Balay 
152257b952d6SSatish Balay #include "sys.h"
152357b952d6SSatish Balay 
15245615d1e5SSatish Balay #undef __FUNC__
15255615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
152657b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
152757b952d6SSatish Balay {
152857b952d6SSatish Balay   Mat          A;
152957b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
153057b952d6SSatish Balay   Scalar       *vals,*buf;
153157b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
153257b952d6SSatish Balay   MPI_Status   status;
1533cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
153457b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
153557b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
153657b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
153757b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
153857b952d6SSatish Balay 
153957b952d6SSatish Balay 
154057b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
154157b952d6SSatish Balay   bs2  = bs*bs;
154257b952d6SSatish Balay 
154357b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
154457b952d6SSatish Balay   if (!rank) {
154557b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
154657b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1547e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
154857b952d6SSatish Balay   }
154957b952d6SSatish Balay 
155057b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
155157b952d6SSatish Balay   M = header[1]; N = header[2];
155257b952d6SSatish Balay 
1553e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
155457b952d6SSatish Balay 
155557b952d6SSatish Balay   /*
155657b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
155757b952d6SSatish Balay      divisible by the blocksize
155857b952d6SSatish Balay   */
155957b952d6SSatish Balay   Mbs        = M/bs;
156057b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
156157b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
156257b952d6SSatish Balay   else                  Mbs++;
156357b952d6SSatish Balay   if (extra_rows &&!rank) {
1564b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
156557b952d6SSatish Balay   }
1566537820f0SBarry Smith 
156757b952d6SSatish Balay   /* determine ownership of all rows */
156857b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
156957b952d6SSatish Balay   m   = mbs * bs;
1570cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1571cee3aa6bSSatish Balay   browners = rowners + size + 1;
157257b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
157357b952d6SSatish Balay   rowners[0] = 0;
1574cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1575cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
157657b952d6SSatish Balay   rstart = rowners[rank];
157757b952d6SSatish Balay   rend   = rowners[rank+1];
157857b952d6SSatish Balay 
157957b952d6SSatish Balay   /* distribute row lengths to all processors */
158057b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
158157b952d6SSatish Balay   if (!rank) {
158257b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
158357b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
158457b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
158557b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1586cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1587cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
158857b952d6SSatish Balay     PetscFree(sndcounts);
158957b952d6SSatish Balay   }
159057b952d6SSatish Balay   else {
159157b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
159257b952d6SSatish Balay   }
159357b952d6SSatish Balay 
159457b952d6SSatish Balay   if (!rank) {
159557b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
159657b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
159757b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
159857b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
159957b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
160057b952d6SSatish Balay         procsnz[i] += rowlengths[j];
160157b952d6SSatish Balay       }
160257b952d6SSatish Balay     }
160357b952d6SSatish Balay     PetscFree(rowlengths);
160457b952d6SSatish Balay 
160557b952d6SSatish Balay     /* determine max buffer needed and allocate it */
160657b952d6SSatish Balay     maxnz = 0;
160757b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
160857b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
160957b952d6SSatish Balay     }
161057b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
161157b952d6SSatish Balay 
161257b952d6SSatish Balay     /* read in my part of the matrix column indices  */
161357b952d6SSatish Balay     nz = procsnz[0];
161457b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
161557b952d6SSatish Balay     mycols = ibuf;
1616cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
161757b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1618cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1619cee3aa6bSSatish Balay 
162057b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
162157b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
162257b952d6SSatish Balay       nz = procsnz[i];
162357b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
162457b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
162557b952d6SSatish Balay     }
162657b952d6SSatish Balay     /* read in the stuff for the last proc */
162757b952d6SSatish Balay     if ( size != 1 ) {
162857b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
162957b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
163057b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1631cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
163257b952d6SSatish Balay     }
163357b952d6SSatish Balay     PetscFree(cols);
163457b952d6SSatish Balay   }
163557b952d6SSatish Balay   else {
163657b952d6SSatish Balay     /* determine buffer space needed for message */
163757b952d6SSatish Balay     nz = 0;
163857b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
163957b952d6SSatish Balay       nz += locrowlens[i];
164057b952d6SSatish Balay     }
164157b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
164257b952d6SSatish Balay     mycols = ibuf;
164357b952d6SSatish Balay     /* receive message of column indices*/
164457b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
164557b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1646e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
164757b952d6SSatish Balay   }
164857b952d6SSatish Balay 
164957b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1650cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1651cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
165257b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1653cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
165457b952d6SSatish Balay   masked1 = mask    + Mbs;
165557b952d6SSatish Balay   masked2 = masked1 + Mbs;
165657b952d6SSatish Balay   rowcount = 0; nzcount = 0;
165757b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
165857b952d6SSatish Balay     dcount  = 0;
165957b952d6SSatish Balay     odcount = 0;
166057b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
166157b952d6SSatish Balay       kmax = locrowlens[rowcount];
166257b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
166357b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
166457b952d6SSatish Balay         if (!mask[tmp]) {
166557b952d6SSatish Balay           mask[tmp] = 1;
166657b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
166757b952d6SSatish Balay           else masked1[dcount++] = tmp;
166857b952d6SSatish Balay         }
166957b952d6SSatish Balay       }
167057b952d6SSatish Balay       rowcount++;
167157b952d6SSatish Balay     }
1672cee3aa6bSSatish Balay 
167357b952d6SSatish Balay     dlens[i]  = dcount;
167457b952d6SSatish Balay     odlens[i] = odcount;
1675cee3aa6bSSatish Balay 
167657b952d6SSatish Balay     /* zero out the mask elements we set */
167757b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
167857b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
167957b952d6SSatish Balay   }
1680cee3aa6bSSatish Balay 
168157b952d6SSatish Balay   /* create our matrix */
1682537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1683537820f0SBarry Smith          CHKERRQ(ierr);
168457b952d6SSatish Balay   A = *newmat;
16856d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
168657b952d6SSatish Balay 
168757b952d6SSatish Balay   if (!rank) {
168857b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
168957b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
169057b952d6SSatish Balay     nz = procsnz[0];
169157b952d6SSatish Balay     vals = buf;
1692cee3aa6bSSatish Balay     mycols = ibuf;
1693cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
169457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1695cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1696537820f0SBarry Smith 
169757b952d6SSatish Balay     /* insert into matrix */
169857b952d6SSatish Balay     jj      = rstart*bs;
169957b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
170057b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
170157b952d6SSatish Balay       mycols += locrowlens[i];
170257b952d6SSatish Balay       vals   += locrowlens[i];
170357b952d6SSatish Balay       jj++;
170457b952d6SSatish Balay     }
170557b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
170657b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
170757b952d6SSatish Balay       nz = procsnz[i];
170857b952d6SSatish Balay       vals = buf;
170957b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
171057b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
171157b952d6SSatish Balay     }
171257b952d6SSatish Balay     /* the last proc */
171357b952d6SSatish Balay     if ( size != 1 ){
171457b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1715cee3aa6bSSatish Balay       vals = buf;
171657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
171757b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1718cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
171957b952d6SSatish Balay     }
172057b952d6SSatish Balay     PetscFree(procsnz);
172157b952d6SSatish Balay   }
172257b952d6SSatish Balay   else {
172357b952d6SSatish Balay     /* receive numeric values */
172457b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
172557b952d6SSatish Balay 
172657b952d6SSatish Balay     /* receive message of values*/
172757b952d6SSatish Balay     vals = buf;
1728cee3aa6bSSatish Balay     mycols = ibuf;
172957b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
173057b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1731e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
173257b952d6SSatish Balay 
173357b952d6SSatish Balay     /* insert into matrix */
173457b952d6SSatish Balay     jj      = rstart*bs;
1735cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
173657b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
173757b952d6SSatish Balay       mycols += locrowlens[i];
173857b952d6SSatish Balay       vals   += locrowlens[i];
173957b952d6SSatish Balay       jj++;
174057b952d6SSatish Balay     }
174157b952d6SSatish Balay   }
174257b952d6SSatish Balay   PetscFree(locrowlens);
174357b952d6SSatish Balay   PetscFree(buf);
174457b952d6SSatish Balay   PetscFree(ibuf);
174557b952d6SSatish Balay   PetscFree(rowners);
174657b952d6SSatish Balay   PetscFree(dlens);
1747cee3aa6bSSatish Balay   PetscFree(mask);
17486d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
17496d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
175057b952d6SSatish Balay   return 0;
175157b952d6SSatish Balay }
175257b952d6SSatish Balay 
175357b952d6SSatish Balay 
1754