xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision c98fd787ce49cfe1bba58d59168f91d3fdf7ec65)
179bdfe76SSatish Balay 
2c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3c5d9258eSSatish Balay 
4c6db04a5SJed Brown #include <petscblaslapack.h>
565a92638SMatthew G. Knepley #include <petscsf.h>
679bdfe76SSatish Balay 
74a2ae208SSatish Balay #undef __FUNCT__
8985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
9985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
107843d17aSBarry Smith {
117843d17aSBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
12dfbe8321SBarry Smith   PetscErrorCode ierr;
13985db425SBarry Smith   PetscInt       i,*idxb = 0;
1487828ca2SBarry Smith   PetscScalar    *va,*vb;
157843d17aSBarry Smith   Vec            vtmp;
167843d17aSBarry Smith 
177843d17aSBarry Smith   PetscFunctionBegin;
18985db425SBarry Smith   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
191ebc52fbSHong Zhang   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
20985db425SBarry Smith   if (idx) {
2126fbe8dcSKarl Rupp     for (i=0; i<A->rmap->n; i++) {
2226fbe8dcSKarl Rupp       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2326fbe8dcSKarl Rupp     }
24985db425SBarry Smith   }
257843d17aSBarry Smith 
26d0f46423SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
27785e854fSJed Brown   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
28985db425SBarry Smith   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
291ebc52fbSHong Zhang   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
307843d17aSBarry Smith 
31d0f46423SBarry Smith   for (i=0; i<A->rmap->n; i++) {
3226fbe8dcSKarl Rupp     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3326fbe8dcSKarl Rupp       va[i] = vb[i];
3426fbe8dcSKarl Rupp       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
3526fbe8dcSKarl Rupp     }
367843d17aSBarry Smith   }
377843d17aSBarry Smith 
381ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
391ebc52fbSHong Zhang   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
40c31cb41cSBarry Smith   ierr = PetscFree(idxb);CHKERRQ(ierr);
416bf464f9SBarry Smith   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
427843d17aSBarry Smith   PetscFunctionReturn(0);
437843d17aSBarry Smith }
447843d17aSBarry Smith 
454a2ae208SSatish Balay #undef __FUNCT__
464a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ"
477087cfbeSBarry Smith PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
487fc3c18eSBarry Smith {
497fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50dfbe8321SBarry Smith   PetscErrorCode ierr;
517fc3c18eSBarry Smith 
527fc3c18eSBarry Smith   PetscFunctionBegin;
537fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
547fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
557fc3c18eSBarry Smith   PetscFunctionReturn(0);
567fc3c18eSBarry Smith }
577fc3c18eSBarry Smith 
584a2ae208SSatish Balay #undef __FUNCT__
594a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
607087cfbeSBarry Smith PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
617fc3c18eSBarry Smith {
627fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
63dfbe8321SBarry Smith   PetscErrorCode ierr;
647fc3c18eSBarry Smith 
657fc3c18eSBarry Smith   PetscFunctionBegin;
667fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
677fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
687fc3c18eSBarry Smith   PetscFunctionReturn(0);
697fc3c18eSBarry Smith }
707fc3c18eSBarry Smith 
71537820f0SBarry Smith /*
72537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
7357b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
74e06f6af7SJed Brown    storage of the matrix.  This is done in a non scalable way since the
7557b952d6SSatish Balay    length of colmap equals the global matrix length.
7657b952d6SSatish Balay */
774a2ae208SSatish Balay #undef __FUNCT__
78ab9863d7SBarry Smith #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
79ab9863d7SBarry Smith PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
8057b952d6SSatish Balay {
8157b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
8257b952d6SSatish Balay   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
836849ba73SBarry Smith   PetscErrorCode ierr;
84d0f46423SBarry Smith   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
8557b952d6SSatish Balay 
86d64ed03dSBarry Smith   PetscFunctionBegin;
87aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
88e23dfa41SBarry Smith   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
8948e59246SSatish Balay   for (i=0; i<nbs; i++) {
903861aac3SJed Brown     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
9148e59246SSatish Balay   }
9248e59246SSatish Balay #else
93854ce69bSBarry Smith   ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
943bb1ff40SBarry Smith   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
95b24ad042SBarry Smith   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
96928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
9748e59246SSatish Balay #endif
983a40ed3dSBarry Smith   PetscFunctionReturn(0);
9957b952d6SSatish Balay }
10057b952d6SSatish Balay 
101f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
10280c1aa95SSatish Balay   { \
10380c1aa95SSatish Balay  \
10480c1aa95SSatish Balay     brow = row/bs;  \
10580c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
106ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
10780c1aa95SSatish Balay     bcol = col/bs; \
10880c1aa95SSatish Balay     ridx = row % bs; cidx = col % bs; \
109ab26458aSBarry Smith     low  = 0; high = nrow; \
110ab26458aSBarry Smith     while (high-low > 3) { \
111ab26458aSBarry Smith       t = (low+high)/2; \
112ab26458aSBarry Smith       if (rp[t] > bcol) high = t; \
113ab26458aSBarry Smith       else              low  = t; \
114ab26458aSBarry Smith     } \
115ab26458aSBarry Smith     for (_i=low; _i<high; _i++) { \
11680c1aa95SSatish Balay       if (rp[_i] > bcol) break; \
11780c1aa95SSatish Balay       if (rp[_i] == bcol) { \
11880c1aa95SSatish Balay         bap = ap +  bs2*_i + bs*cidx + ridx; \
119eada6651SSatish Balay         if (addv == ADD_VALUES) *bap += value;  \
120eada6651SSatish Balay         else                    *bap  = value;  \
121ac7a638eSSatish Balay         goto a_noinsert; \
12280c1aa95SSatish Balay       } \
12380c1aa95SSatish Balay     } \
12489280ab3SLois Curfman McInnes     if (a->nonew == 1) goto a_noinsert; \
125e32f2f54SBarry Smith     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
126fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
12780c1aa95SSatish Balay     N = nrow++ - 1;  \
12880c1aa95SSatish Balay     /* shift up all the later entries in this row */ \
12980c1aa95SSatish Balay     for (ii=N; ii>=_i; ii--) { \
13080c1aa95SSatish Balay       rp[ii+1] = rp[ii]; \
1313eda8832SBarry Smith       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
13280c1aa95SSatish Balay     } \
1333eda8832SBarry Smith     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
13480c1aa95SSatish Balay     rp[_i]                      = bcol;  \
13580c1aa95SSatish Balay     ap[bs2*_i + bs*cidx + ridx] = value;  \
136ac7a638eSSatish Balay a_noinsert:; \
13780c1aa95SSatish Balay     ailen[brow] = nrow; \
13880c1aa95SSatish Balay   }
13957b952d6SSatish Balay 
140ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
141ac7a638eSSatish Balay   { \
142ac7a638eSSatish Balay     brow = row/bs;  \
143ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
144ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
145ac7a638eSSatish Balay     bcol = col/bs; \
146ac7a638eSSatish Balay     ridx = row % bs; cidx = col % bs; \
147ac7a638eSSatish Balay     low  = 0; high = nrow; \
148ac7a638eSSatish Balay     while (high-low > 3) { \
149ac7a638eSSatish Balay       t = (low+high)/2; \
150ac7a638eSSatish Balay       if (rp[t] > bcol) high = t; \
151ac7a638eSSatish Balay       else              low  = t; \
152ac7a638eSSatish Balay     } \
153ac7a638eSSatish Balay     for (_i=low; _i<high; _i++) { \
154ac7a638eSSatish Balay       if (rp[_i] > bcol) break; \
155ac7a638eSSatish Balay       if (rp[_i] == bcol) { \
156ac7a638eSSatish Balay         bap = ap +  bs2*_i + bs*cidx + ridx; \
157ac7a638eSSatish Balay         if (addv == ADD_VALUES) *bap += value;  \
158ac7a638eSSatish Balay         else                    *bap  = value;  \
159ac7a638eSSatish Balay         goto b_noinsert; \
160ac7a638eSSatish Balay       } \
161ac7a638eSSatish Balay     } \
16289280ab3SLois Curfman McInnes     if (b->nonew == 1) goto b_noinsert; \
163e32f2f54SBarry Smith     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
164fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
165ac7a638eSSatish Balay     N = nrow++ - 1;  \
166ac7a638eSSatish Balay     /* shift up all the later entries in this row */ \
167ac7a638eSSatish Balay     for (ii=N; ii>=_i; ii--) { \
168ac7a638eSSatish Balay       rp[ii+1] = rp[ii]; \
1693eda8832SBarry Smith       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
170ac7a638eSSatish Balay     } \
1713eda8832SBarry Smith     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
172ac7a638eSSatish Balay     rp[_i]                      = bcol;  \
173ac7a638eSSatish Balay     ap[bs2*_i + bs*cidx + ridx] = value;  \
174ac7a638eSSatish Balay b_noinsert:; \
175ac7a638eSSatish Balay     bilen[brow] = nrow; \
176ac7a638eSSatish Balay   }
177ac7a638eSSatish Balay 
1784a2ae208SSatish Balay #undef __FUNCT__
1794a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ"
180b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
18157b952d6SSatish Balay {
18257b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
18393fea6afSBarry Smith   MatScalar      value;
184ace3abfcSBarry Smith   PetscBool      roworiented = baij->roworiented;
185dfbe8321SBarry Smith   PetscErrorCode ierr;
186b24ad042SBarry Smith   PetscInt       i,j,row,col;
187d0f46423SBarry Smith   PetscInt       rstart_orig=mat->rmap->rstart;
188d0f46423SBarry Smith   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
189d0f46423SBarry Smith   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
19057b952d6SSatish Balay 
191eada6651SSatish Balay   /* Some Variables required in the macro */
19280c1aa95SSatish Balay   Mat         A     = baij->A;
19380c1aa95SSatish Balay   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
194b24ad042SBarry Smith   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
1953eda8832SBarry Smith   MatScalar   *aa   =a->a;
196ac7a638eSSatish Balay 
197ac7a638eSSatish Balay   Mat         B     = baij->B;
198ac7a638eSSatish Balay   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
199b24ad042SBarry Smith   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
2003eda8832SBarry Smith   MatScalar   *ba   =b->a;
201ac7a638eSSatish Balay 
202b24ad042SBarry Smith   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
203b24ad042SBarry Smith   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
2043eda8832SBarry Smith   MatScalar *ap,*bap;
20580c1aa95SSatish Balay 
206d64ed03dSBarry Smith   PetscFunctionBegin;
20757b952d6SSatish Balay   for (i=0; i<m; i++) {
2085ef9f2a5SBarry Smith     if (im[i] < 0) continue;
2092515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
210e32f2f54SBarry Smith     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
211639f9d9dSBarry Smith #endif
21257b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
21357b952d6SSatish Balay       row = im[i] - rstart_orig;
21457b952d6SSatish Balay       for (j=0; j<n; j++) {
21557b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig) {
21657b952d6SSatish Balay           col = in[j] - cstart_orig;
217db4deed7SKarl Rupp           if (roworiented) value = v[i*n+j];
218db4deed7SKarl Rupp           else             value = v[i+j*m];
219f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
22080c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
22173959e64SBarry Smith         } else if (in[j] < 0) continue;
2222515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
223660746e0SBarry Smith         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
224639f9d9dSBarry Smith #endif
22557b952d6SSatish Balay         else {
22657b952d6SSatish Balay           if (mat->was_assembled) {
227905e6a2fSBarry Smith             if (!baij->colmap) {
228ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
229905e6a2fSBarry Smith             }
230aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
2310f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
232bba1ac68SSatish Balay             col  = col - 1;
23348e59246SSatish Balay #else
234bba1ac68SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
23548e59246SSatish Balay #endif
236c9ef50b2SBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
237ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
2388295de27SSatish Balay               col  =  in[j];
2399bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2409bf004c3SSatish Balay               B    = baij->B;
2419bf004c3SSatish Balay               b    = (Mat_SeqBAIJ*)(B)->data;
2429bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2439bf004c3SSatish Balay               ba   =b->a;
244c9ef50b2SBarry Smith             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
245c9ef50b2SBarry Smith             else col += in[j]%bs;
2468295de27SSatish Balay           } else col = in[j];
247db4deed7SKarl Rupp           if (roworiented) value = v[i*n+j];
248db4deed7SKarl Rupp           else             value = v[i+j*m];
24990da58bdSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
25090da58bdSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
25157b952d6SSatish Balay         }
25257b952d6SSatish Balay       }
253d64ed03dSBarry Smith     } else {
2544cb17eb5SBarry Smith       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
25590f02eecSBarry Smith       if (!baij->donotstash) {
2565080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
257ff2fd236SBarry Smith         if (roworiented) {
258b400d20cSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
259ff2fd236SBarry Smith         } else {
260b400d20cSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
26157b952d6SSatish Balay         }
26257b952d6SSatish Balay       }
26357b952d6SSatish Balay     }
26490f02eecSBarry Smith   }
2653a40ed3dSBarry Smith   PetscFunctionReturn(0);
26657b952d6SSatish Balay }
26757b952d6SSatish Balay 
2684a2ae208SSatish Balay #undef __FUNCT__
26997e5c40aSBarry Smith #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
27097e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
271ab26458aSBarry Smith {
272ab26458aSBarry Smith   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
273dd6ea824SBarry Smith   const PetscScalar *value;
274f15d580aSBarry Smith   MatScalar         *barray     = baij->barray;
275ace3abfcSBarry Smith   PetscBool         roworiented = baij->roworiented;
276dfbe8321SBarry Smith   PetscErrorCode    ierr;
277899cda47SBarry Smith   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
278899cda47SBarry Smith   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
279d0f46423SBarry Smith   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
280ab26458aSBarry Smith 
281b16ae2b1SBarry Smith   PetscFunctionBegin;
28230793edcSSatish Balay   if (!barray) {
283785e854fSJed Brown     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
28482502324SSatish Balay     baij->barray = barray;
28530793edcSSatish Balay   }
28630793edcSSatish Balay 
28726fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
28826fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
28926fbe8dcSKarl Rupp 
290ab26458aSBarry Smith   for (i=0; i<m; i++) {
2915ef9f2a5SBarry Smith     if (im[i] < 0) continue;
2922515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
293e32f2f54SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
294ab26458aSBarry Smith #endif
295ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
296ab26458aSBarry Smith       row = im[i] - rstart;
297ab26458aSBarry Smith       for (j=0; j<n; j++) {
29815b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
29915b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
300f15d580aSBarry Smith           barray = (MatScalar*)v + i*bs2;
30115b57d14SSatish Balay         } else if ((!roworiented) && (m == 1)) {
302f15d580aSBarry Smith           barray = (MatScalar*)v + j*bs2;
30315b57d14SSatish Balay         } else { /* Here a copy is required */
304ab26458aSBarry Smith           if (roworiented) {
30553ef36baSBarry Smith             value = v + (i*(stepval+bs) + j)*bs;
306ab26458aSBarry Smith           } else {
30753ef36baSBarry Smith             value = v + (j*(stepval+bs) + i)*bs;
308abef11f7SSatish Balay           }
30953ef36baSBarry Smith           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
31026fbe8dcSKarl Rupp             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
31153ef36baSBarry Smith             barray += bs;
31247513183SBarry Smith           }
31330793edcSSatish Balay           barray -= bs2;
31415b57d14SSatish Balay         }
315abef11f7SSatish Balay 
316abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend) {
317abef11f7SSatish Balay           col  = in[j] - cstart;
31897e5c40aSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
31926fbe8dcSKarl Rupp         } else if (in[j] < 0) continue;
3202515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
321cb9801acSJed Brown         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
322ab26458aSBarry Smith #endif
323ab26458aSBarry Smith         else {
324ab26458aSBarry Smith           if (mat->was_assembled) {
325ab26458aSBarry Smith             if (!baij->colmap) {
326ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
327ab26458aSBarry Smith             }
328a5eb4965SSatish Balay 
3292515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
330aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
331b24ad042SBarry Smith             { PetscInt data;
3320f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
333e32f2f54SBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
334fa46199cSSatish Balay             }
33548e59246SSatish Balay #else
336e32f2f54SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
337a5eb4965SSatish Balay #endif
33848e59246SSatish Balay #endif
339aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
3400f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
341fa46199cSSatish Balay             col  = (col - 1)/bs;
34248e59246SSatish Balay #else
343a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
34448e59246SSatish Balay #endif
3450e9bae81SBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
346ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
347ab26458aSBarry Smith               col  =  in[j];
3480e9bae81SBarry Smith             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
349db4deed7SKarl Rupp           } else col = in[j];
35097e5c40aSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
351ab26458aSBarry Smith         }
352ab26458aSBarry Smith       }
353d64ed03dSBarry Smith     } else {
3544cb17eb5SBarry Smith       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
355ab26458aSBarry Smith       if (!baij->donotstash) {
356ff2fd236SBarry Smith         if (roworiented) {
3576fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
358ff2fd236SBarry Smith         } else {
3596fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
360ff2fd236SBarry Smith         }
361abef11f7SSatish Balay       }
362ab26458aSBarry Smith     }
363ab26458aSBarry Smith   }
3643a40ed3dSBarry Smith   PetscFunctionReturn(0);
365ab26458aSBarry Smith }
3666fa18ffdSBarry Smith 
3670bdbc534SSatish Balay #define HASH_KEY 0.6180339887
368b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
369b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
370b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
3714a2ae208SSatish Balay #undef __FUNCT__
37297e5c40aSBarry Smith #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
37397e5c40aSBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
3740bdbc534SSatish Balay {
3750bdbc534SSatish Balay   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
376ace3abfcSBarry Smith   PetscBool      roworiented = baij->roworiented;
377dfbe8321SBarry Smith   PetscErrorCode ierr;
378b24ad042SBarry Smith   PetscInt       i,j,row,col;
379d0f46423SBarry Smith   PetscInt       rstart_orig=mat->rmap->rstart;
380d0f46423SBarry Smith   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
381d0f46423SBarry Smith   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
382329f5518SBarry Smith   PetscReal      tmp;
3833eda8832SBarry Smith   MatScalar      **HD = baij->hd,value;
3842515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
385b24ad042SBarry Smith   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
3864a15367fSSatish Balay #endif
3870bdbc534SSatish Balay 
3880bdbc534SSatish Balay   PetscFunctionBegin;
3890bdbc534SSatish Balay   for (i=0; i<m; i++) {
3902515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
391e32f2f54SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
392e32f2f54SBarry Smith     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
3930bdbc534SSatish Balay #endif
3940bdbc534SSatish Balay     row = im[i];
395c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
3960bdbc534SSatish Balay       for (j=0; j<n; j++) {
3970bdbc534SSatish Balay         col = in[j];
398db4deed7SKarl Rupp         if (roworiented) value = v[i*n+j];
399db4deed7SKarl Rupp         else             value = v[i+j*m];
400b24ad042SBarry Smith         /* Look up PetscInto the Hash Table */
401c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
402c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4030bdbc534SSatish Balay 
404c2760754SSatish Balay 
405c2760754SSatish Balay         idx = h1;
4062515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
407187ce0cbSSatish Balay         insert_ct++;
408187ce0cbSSatish Balay         total_ct++;
409187ce0cbSSatish Balay         if (HT[idx] != key) {
410187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
411187ce0cbSSatish Balay           if (idx == size) {
412187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
413f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
414187ce0cbSSatish Balay           }
415187ce0cbSSatish Balay         }
416187ce0cbSSatish Balay #else
417c2760754SSatish Balay         if (HT[idx] != key) {
418c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
419c2760754SSatish Balay           if (idx == size) {
420c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
421f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
422c2760754SSatish Balay           }
423c2760754SSatish Balay         }
424187ce0cbSSatish Balay #endif
425c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
426c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
427c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
4280bdbc534SSatish Balay       }
42926fbe8dcSKarl Rupp     } else if (!baij->donotstash) {
430ff2fd236SBarry Smith       if (roworiented) {
431b400d20cSBarry Smith         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
432ff2fd236SBarry Smith       } else {
433b400d20cSBarry Smith         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
4340bdbc534SSatish Balay       }
4350bdbc534SSatish Balay     }
4360bdbc534SSatish Balay   }
4372515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
438187ce0cbSSatish Balay   baij->ht_total_ct  = total_ct;
439187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
440187ce0cbSSatish Balay #endif
4410bdbc534SSatish Balay   PetscFunctionReturn(0);
4420bdbc534SSatish Balay }
4430bdbc534SSatish Balay 
4444a2ae208SSatish Balay #undef __FUNCT__
44597e5c40aSBarry Smith #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
44697e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
4470bdbc534SSatish Balay {
4480bdbc534SSatish Balay   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
449ace3abfcSBarry Smith   PetscBool         roworiented = baij->roworiented;
450dfbe8321SBarry Smith   PetscErrorCode    ierr;
451b24ad042SBarry Smith   PetscInt          i,j,ii,jj,row,col;
452899cda47SBarry Smith   PetscInt          rstart=baij->rstartbs;
453d0f46423SBarry Smith   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
454b24ad042SBarry Smith   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
455329f5518SBarry Smith   PetscReal         tmp;
4563eda8832SBarry Smith   MatScalar         **HD = baij->hd,*baij_a;
457dd6ea824SBarry Smith   const PetscScalar *v_t,*value;
4582515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
459b24ad042SBarry Smith   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4604a15367fSSatish Balay #endif
4610bdbc534SSatish Balay 
462d0a41580SSatish Balay   PetscFunctionBegin;
46326fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
46426fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
46526fbe8dcSKarl Rupp 
4660bdbc534SSatish Balay   for (i=0; i<m; i++) {
4672515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
468e32f2f54SBarry Smith     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
469e32f2f54SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
4700bdbc534SSatish Balay #endif
4710bdbc534SSatish Balay     row = im[i];
472ab715e2cSSatish Balay     v_t = v + i*nbs2;
473c2760754SSatish Balay     if (row >= rstart && row < rend) {
4740bdbc534SSatish Balay       for (j=0; j<n; j++) {
4750bdbc534SSatish Balay         col = in[j];
4760bdbc534SSatish Balay 
4770bdbc534SSatish Balay         /* Look up into the Hash Table */
478c2760754SSatish Balay         key = row*Nbs+col+1;
479c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4800bdbc534SSatish Balay 
481c2760754SSatish Balay         idx = h1;
4822515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
483187ce0cbSSatish Balay         total_ct++;
484187ce0cbSSatish Balay         insert_ct++;
485187ce0cbSSatish Balay         if (HT[idx] != key) {
486187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
487187ce0cbSSatish Balay           if (idx == size) {
488187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
489f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
490187ce0cbSSatish Balay           }
491187ce0cbSSatish Balay         }
492187ce0cbSSatish Balay #else
493c2760754SSatish Balay         if (HT[idx] != key) {
494c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
495c2760754SSatish Balay           if (idx == size) {
496c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
497f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498c2760754SSatish Balay           }
499c2760754SSatish Balay         }
500187ce0cbSSatish Balay #endif
501c2760754SSatish Balay         baij_a = HD[idx];
5020bdbc534SSatish Balay         if (roworiented) {
503c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
504187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
505187ce0cbSSatish Balay           value = v_t;
506187ce0cbSSatish Balay           v_t  += bs;
507fef45726SSatish Balay           if (addv == ADD_VALUES) {
508c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
509c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
510fef45726SSatish Balay                 baij_a[jj] += *value++;
511b4cc0f5aSSatish Balay               }
512b4cc0f5aSSatish Balay             }
513fef45726SSatish Balay           } else {
514c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
515c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
516fef45726SSatish Balay                 baij_a[jj] = *value++;
517fef45726SSatish Balay               }
518fef45726SSatish Balay             }
519fef45726SSatish Balay           }
5200bdbc534SSatish Balay         } else {
5210bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
522fef45726SSatish Balay           if (addv == ADD_VALUES) {
523b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
5240bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
525fef45726SSatish Balay                 baij_a[jj] += *value++;
526fef45726SSatish Balay               }
527fef45726SSatish Balay             }
528fef45726SSatish Balay           } else {
529fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
530fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
531fef45726SSatish Balay                 baij_a[jj] = *value++;
532fef45726SSatish Balay               }
533b4cc0f5aSSatish Balay             }
5340bdbc534SSatish Balay           }
5350bdbc534SSatish Balay         }
5360bdbc534SSatish Balay       }
5370bdbc534SSatish Balay     } else {
5380bdbc534SSatish Balay       if (!baij->donotstash) {
5390bdbc534SSatish Balay         if (roworiented) {
5408798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
5410bdbc534SSatish Balay         } else {
5428798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
5430bdbc534SSatish Balay         }
5440bdbc534SSatish Balay       }
5450bdbc534SSatish Balay     }
5460bdbc534SSatish Balay   }
5472515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
548187ce0cbSSatish Balay   baij->ht_total_ct  = total_ct;
549187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
550187ce0cbSSatish Balay #endif
5510bdbc534SSatish Balay   PetscFunctionReturn(0);
5520bdbc534SSatish Balay }
553133cdb44SSatish Balay 
5544a2ae208SSatish Balay #undef __FUNCT__
5554a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ"
556b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
557d6de1c52SSatish Balay {
558d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
5596849ba73SBarry Smith   PetscErrorCode ierr;
560d0f46423SBarry Smith   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
561d0f46423SBarry Smith   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
562d6de1c52SSatish Balay 
563133cdb44SSatish Balay   PetscFunctionBegin;
564d6de1c52SSatish Balay   for (i=0; i<m; i++) {
565e32f2f54SBarry Smith     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
566e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
567d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
568d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
569d6de1c52SSatish Balay       for (j=0; j<n; j++) {
570e32f2f54SBarry Smith         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
571e32f2f54SBarry Smith         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
572d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend) {
573d6de1c52SSatish Balay           col  = idxn[j] - bscstart;
57498dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575d64ed03dSBarry Smith         } else {
576905e6a2fSBarry Smith           if (!baij->colmap) {
577ab9863d7SBarry Smith             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
578905e6a2fSBarry Smith           }
579aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
5800f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
581fa46199cSSatish Balay           data--;
58248e59246SSatish Balay #else
58348e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
58448e59246SSatish Balay #endif
58548e59246SSatish Balay           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
586d9d09a02SSatish Balay           else {
58748e59246SSatish Balay             col  = data + idxn[j]%bs;
58898dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589d6de1c52SSatish Balay           }
590d6de1c52SSatish Balay         }
591d6de1c52SSatish Balay       }
592f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
593d6de1c52SSatish Balay   }
5943a40ed3dSBarry Smith   PetscFunctionReturn(0);
595d6de1c52SSatish Balay }
596d6de1c52SSatish Balay 
5974a2ae208SSatish Balay #undef __FUNCT__
5984a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ"
599dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
600d6de1c52SSatish Balay {
601d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
602d6de1c52SSatish Balay   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
603dfbe8321SBarry Smith   PetscErrorCode ierr;
604d0f46423SBarry Smith   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
605329f5518SBarry Smith   PetscReal      sum = 0.0;
6063eda8832SBarry Smith   MatScalar      *v;
607d6de1c52SSatish Balay 
608d64ed03dSBarry Smith   PetscFunctionBegin;
609d6de1c52SSatish Balay   if (baij->size == 1) {
610064f8208SBarry Smith     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
611d6de1c52SSatish Balay   } else {
612d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
613d6de1c52SSatish Balay       v  = amat->a;
6148a62d963SHong Zhang       nz = amat->nz*bs2;
6158a62d963SHong Zhang       for (i=0; i<nz; i++) {
616329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
617d6de1c52SSatish Balay       }
618d6de1c52SSatish Balay       v  = bmat->a;
6198a62d963SHong Zhang       nz = bmat->nz*bs2;
6208a62d963SHong Zhang       for (i=0; i<nz; i++) {
621329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
622d6de1c52SSatish Balay       }
623ce94432eSBarry Smith       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
6248f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
6258a62d963SHong Zhang     } else if (type == NORM_1) { /* max column sum */
6268a62d963SHong Zhang       PetscReal *tmp,*tmp2;
627899cda47SBarry Smith       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
628dcca6d9dSJed Brown       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
629d0f46423SBarry Smith       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
6308a62d963SHong Zhang       v    = amat->a; jj = amat->j;
6318a62d963SHong Zhang       for (i=0; i<amat->nz; i++) {
6328a62d963SHong Zhang         for (j=0; j<bs; j++) {
6338a62d963SHong Zhang           col = bs*(cstart + *jj) + j; /* column index */
6348a62d963SHong Zhang           for (row=0; row<bs; row++) {
6358a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v);  v++;
6368a62d963SHong Zhang           }
6378a62d963SHong Zhang         }
6388a62d963SHong Zhang         jj++;
6398a62d963SHong Zhang       }
6408a62d963SHong Zhang       v = bmat->a; jj = bmat->j;
6418a62d963SHong Zhang       for (i=0; i<bmat->nz; i++) {
6428a62d963SHong Zhang         for (j=0; j<bs; j++) {
6438a62d963SHong Zhang           col = bs*garray[*jj] + j;
6448a62d963SHong Zhang           for (row=0; row<bs; row++) {
6458a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v); v++;
6468a62d963SHong Zhang           }
6478a62d963SHong Zhang         }
6488a62d963SHong Zhang         jj++;
6498a62d963SHong Zhang       }
650ce94432eSBarry Smith       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
6518a62d963SHong Zhang       *nrm = 0.0;
652d0f46423SBarry Smith       for (j=0; j<mat->cmap->N; j++) {
6538a62d963SHong Zhang         if (tmp2[j] > *nrm) *nrm = tmp2[j];
6548a62d963SHong Zhang       }
655fca92195SBarry Smith       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
6568a62d963SHong Zhang     } else if (type == NORM_INFINITY) { /* max row sum */
657577dd1f9SKris Buschelman       PetscReal *sums;
658785e854fSJed Brown       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
6598a62d963SHong Zhang       sum  = 0.0;
6608a62d963SHong Zhang       for (j=0; j<amat->mbs; j++) {
6618a62d963SHong Zhang         for (row=0; row<bs; row++) sums[row] = 0.0;
6628a62d963SHong Zhang         v  = amat->a + bs2*amat->i[j];
6638a62d963SHong Zhang         nz = amat->i[j+1]-amat->i[j];
6648a62d963SHong Zhang         for (i=0; i<nz; i++) {
6658a62d963SHong Zhang           for (col=0; col<bs; col++) {
6668a62d963SHong Zhang             for (row=0; row<bs; row++) {
6678a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
6688a62d963SHong Zhang             }
6698a62d963SHong Zhang           }
6708a62d963SHong Zhang         }
6718a62d963SHong Zhang         v  = bmat->a + bs2*bmat->i[j];
6728a62d963SHong Zhang         nz = bmat->i[j+1]-bmat->i[j];
6738a62d963SHong Zhang         for (i=0; i<nz; i++) {
6748a62d963SHong Zhang           for (col=0; col<bs; col++) {
6758a62d963SHong Zhang             for (row=0; row<bs; row++) {
6768a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
6778a62d963SHong Zhang             }
6788a62d963SHong Zhang           }
6798a62d963SHong Zhang         }
6808a62d963SHong Zhang         for (row=0; row<bs; row++) {
6818a62d963SHong Zhang           if (sums[row] > sum) sum = sums[row];
6828a62d963SHong Zhang         }
6838a62d963SHong Zhang       }
684ce94432eSBarry Smith       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
685577dd1f9SKris Buschelman       ierr = PetscFree(sums);CHKERRQ(ierr);
686ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
687d64ed03dSBarry Smith   }
6883a40ed3dSBarry Smith   PetscFunctionReturn(0);
689d6de1c52SSatish Balay }
69057b952d6SSatish Balay 
691fef45726SSatish Balay /*
692fef45726SSatish Balay   Creates the hash table, and sets the table
693fef45726SSatish Balay   This table is created only once.
694fef45726SSatish Balay   If new entried need to be added to the matrix
695fef45726SSatish Balay   then the hash table has to be destroyed and
696fef45726SSatish Balay   recreated.
697fef45726SSatish Balay */
6984a2ae208SSatish Balay #undef __FUNCT__
6994a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
700dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
701596b8d2eSBarry Smith {
702596b8d2eSBarry Smith   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
703596b8d2eSBarry Smith   Mat            A     = baij->A,B=baij->B;
704596b8d2eSBarry Smith   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
705b24ad042SBarry Smith   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
7066849ba73SBarry Smith   PetscErrorCode ierr;
707fca92195SBarry Smith   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
708899cda47SBarry Smith   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
709b24ad042SBarry Smith   PetscInt       *HT,key;
7103eda8832SBarry Smith   MatScalar      **HD;
711329f5518SBarry Smith   PetscReal      tmp;
7126cf91177SBarry Smith #if defined(PETSC_USE_INFO)
713b24ad042SBarry Smith   PetscInt ct=0,max=0;
7144a15367fSSatish Balay #endif
715fef45726SSatish Balay 
716d64ed03dSBarry Smith   PetscFunctionBegin;
717fca92195SBarry Smith   if (baij->ht) PetscFunctionReturn(0);
718fef45726SSatish Balay 
719fca92195SBarry Smith   baij->ht_size = (PetscInt)(factor*nz);
720fca92195SBarry Smith   ht_size       = baij->ht_size;
7210bdbc534SSatish Balay 
722fef45726SSatish Balay   /* Allocate Memory for Hash Table */
7231795a4d1SJed Brown   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
724b9e4cc15SSatish Balay   HD   = baij->hd;
725a07cd24cSSatish Balay   HT   = baij->ht;
726b9e4cc15SSatish Balay 
727596b8d2eSBarry Smith   /* Loop Over A */
7280bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
729596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
7300bdbc534SSatish Balay       row = i+rstart;
7310bdbc534SSatish Balay       col = aj[j]+cstart;
732596b8d2eSBarry Smith 
733187ce0cbSSatish Balay       key = row*Nbs + col + 1;
734fca92195SBarry Smith       h1  = HASH(ht_size,key,tmp);
735fca92195SBarry Smith       for (k=0; k<ht_size; k++) {
736fca92195SBarry Smith         if (!HT[(h1+k)%ht_size]) {
737fca92195SBarry Smith           HT[(h1+k)%ht_size] = key;
738fca92195SBarry Smith           HD[(h1+k)%ht_size] = a->a + j*bs2;
739596b8d2eSBarry Smith           break;
7406cf91177SBarry Smith #if defined(PETSC_USE_INFO)
741187ce0cbSSatish Balay         } else {
742187ce0cbSSatish Balay           ct++;
743187ce0cbSSatish Balay #endif
744596b8d2eSBarry Smith         }
745187ce0cbSSatish Balay       }
7466cf91177SBarry Smith #if defined(PETSC_USE_INFO)
747187ce0cbSSatish Balay       if (k> max) max = k;
748187ce0cbSSatish Balay #endif
749596b8d2eSBarry Smith     }
750596b8d2eSBarry Smith   }
751596b8d2eSBarry Smith   /* Loop Over B */
7520bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
753596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
7540bdbc534SSatish Balay       row = i+rstart;
7550bdbc534SSatish Balay       col = garray[bj[j]];
756187ce0cbSSatish Balay       key = row*Nbs + col + 1;
757fca92195SBarry Smith       h1  = HASH(ht_size,key,tmp);
758fca92195SBarry Smith       for (k=0; k<ht_size; k++) {
759fca92195SBarry Smith         if (!HT[(h1+k)%ht_size]) {
760fca92195SBarry Smith           HT[(h1+k)%ht_size] = key;
761fca92195SBarry Smith           HD[(h1+k)%ht_size] = b->a + j*bs2;
762596b8d2eSBarry Smith           break;
7636cf91177SBarry Smith #if defined(PETSC_USE_INFO)
764187ce0cbSSatish Balay         } else {
765187ce0cbSSatish Balay           ct++;
766187ce0cbSSatish Balay #endif
767596b8d2eSBarry Smith         }
768187ce0cbSSatish Balay       }
7696cf91177SBarry Smith #if defined(PETSC_USE_INFO)
770187ce0cbSSatish Balay       if (k> max) max = k;
771187ce0cbSSatish Balay #endif
772596b8d2eSBarry Smith     }
773596b8d2eSBarry Smith   }
774596b8d2eSBarry Smith 
775596b8d2eSBarry Smith   /* Print Summary */
7766cf91177SBarry Smith #if defined(PETSC_USE_INFO)
777fca92195SBarry Smith   for (i=0,j=0; i<ht_size; i++) {
77826fbe8dcSKarl Rupp     if (HT[i]) j++;
779c38d4ed2SBarry Smith   }
7801e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
781187ce0cbSSatish Balay #endif
7823a40ed3dSBarry Smith   PetscFunctionReturn(0);
783596b8d2eSBarry Smith }
78457b952d6SSatish Balay 
7854a2ae208SSatish Balay #undef __FUNCT__
7864a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
787dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
788bbb85fb3SSatish Balay {
789bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
790dfbe8321SBarry Smith   PetscErrorCode ierr;
791b24ad042SBarry Smith   PetscInt       nstash,reallocs;
792bbb85fb3SSatish Balay   InsertMode     addv;
793bbb85fb3SSatish Balay 
794bbb85fb3SSatish Balay   PetscFunctionBegin;
79526fbe8dcSKarl Rupp   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
796bbb85fb3SSatish Balay 
797bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
798ce94432eSBarry Smith   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
799ce94432eSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
800bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
801bbb85fb3SSatish Balay 
802d0f46423SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
8031e2582c4SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
8048798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
8051e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
80646680499SSatish Balay   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
8071e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
808bbb85fb3SSatish Balay   PetscFunctionReturn(0);
809bbb85fb3SSatish Balay }
810bbb85fb3SSatish Balay 
8114a2ae208SSatish Balay #undef __FUNCT__
8124a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
813dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
814bbb85fb3SSatish Balay {
815bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
81691c97fd4SSatish Balay   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
8176849ba73SBarry Smith   PetscErrorCode ierr;
818b24ad042SBarry Smith   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
819e44c0bd4SBarry Smith   PetscInt       *row,*col;
820ace3abfcSBarry Smith   PetscBool      r1,r2,r3,other_disassembled;
8213eda8832SBarry Smith   MatScalar      *val;
822bbb85fb3SSatish Balay   InsertMode     addv = mat->insertmode;
823b24ad042SBarry Smith   PetscMPIInt    n;
824bbb85fb3SSatish Balay 
825bbb85fb3SSatish Balay   PetscFunctionBegin;
8265fd66863SKarl Rupp   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
8274cb17eb5SBarry Smith   if (!baij->donotstash && !mat->nooffprocentries) {
828a2d1c673SSatish Balay     while (1) {
8298798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
830a2d1c673SSatish Balay       if (!flg) break;
831a2d1c673SSatish Balay 
832bbb85fb3SSatish Balay       for (i=0; i<n;) {
833bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
83426fbe8dcSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
83526fbe8dcSKarl Rupp           if (row[j] != rstart) break;
83626fbe8dcSKarl Rupp         }
837bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
838bbb85fb3SSatish Balay         else       ncols = n-i;
839bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
84097e5c40aSBarry Smith         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
841bbb85fb3SSatish Balay         i    = j;
842bbb85fb3SSatish Balay       }
843bbb85fb3SSatish Balay     }
8448798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
845a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
846a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
847a2d1c673SSatish Balay        restore the original flags */
848a2d1c673SSatish Balay     r1 = baij->roworiented;
849a2d1c673SSatish Balay     r2 = a->roworiented;
85091c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
85126fbe8dcSKarl Rupp 
8527c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
8537c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
85426fbe8dcSKarl Rupp 
85591c97fd4SSatish Balay     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
856a2d1c673SSatish Balay     while (1) {
8578798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
858a2d1c673SSatish Balay       if (!flg) break;
859a2d1c673SSatish Balay 
860a2d1c673SSatish Balay       for (i=0; i<n;) {
861a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
86226fbe8dcSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
86326fbe8dcSKarl Rupp           if (row[j] != rstart) break;
86426fbe8dcSKarl Rupp         }
865a2d1c673SSatish Balay         if (j < n) ncols = j-i;
866a2d1c673SSatish Balay         else       ncols = n-i;
86797e5c40aSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
868a2d1c673SSatish Balay         i    = j;
869a2d1c673SSatish Balay       }
870a2d1c673SSatish Balay     }
8718798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
87226fbe8dcSKarl Rupp 
873a2d1c673SSatish Balay     baij->roworiented = r1;
874a2d1c673SSatish Balay     a->roworiented    = r2;
87526fbe8dcSKarl Rupp 
87691c97fd4SSatish Balay     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
877bbb85fb3SSatish Balay   }
878bbb85fb3SSatish Balay 
879bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
880bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
881bbb85fb3SSatish Balay 
882bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
883bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
884bbb85fb3SSatish Balay   /*
885bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
886bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
887bbb85fb3SSatish Balay   */
888bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
889ce94432eSBarry Smith     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
890bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
891ab9863d7SBarry Smith       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
892bbb85fb3SSatish Balay     }
893bbb85fb3SSatish Balay   }
894bbb85fb3SSatish Balay 
895bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
896bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
897bbb85fb3SSatish Balay   }
898bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
899bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
900bbb85fb3SSatish Balay 
9016cf91177SBarry Smith #if defined(PETSC_USE_INFO)
902bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
9031e2582c4SBarry Smith     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
90426fbe8dcSKarl Rupp 
905bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
906bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
907bbb85fb3SSatish Balay   }
908bbb85fb3SSatish Balay #endif
909bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
910bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
91126fbe8dcSKarl Rupp 
912bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
913bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
914bbb85fb3SSatish Balay   }
915bbb85fb3SSatish Balay 
916fca92195SBarry Smith   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
91726fbe8dcSKarl Rupp 
918606d414cSSatish Balay   baij->rowvalues = 0;
9194f9cfa9eSBarry Smith 
9204f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
9214f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
922e56f5c9eSBarry Smith     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
92309e82e2bSBarry Smith     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
924e56f5c9eSBarry Smith   }
925bbb85fb3SSatish Balay   PetscFunctionReturn(0);
926bbb85fb3SSatish Balay }
92757b952d6SSatish Balay 
9287da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
9299804daf3SBarry Smith #include <petscdraw.h>
9304a2ae208SSatish Balay #undef __FUNCT__
9314a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
9326849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
93357b952d6SSatish Balay {
93457b952d6SSatish Balay   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
935dfbe8321SBarry Smith   PetscErrorCode    ierr;
9367da1fb6eSBarry Smith   PetscMPIInt       rank = baij->rank;
937d0f46423SBarry Smith   PetscInt          bs   = mat->rmap->bs;
938ace3abfcSBarry Smith   PetscBool         iascii,isdraw;
939b0a32e0cSBarry Smith   PetscViewer       sviewer;
940f3ef73ceSBarry Smith   PetscViewerFormat format;
94157b952d6SSatish Balay 
942d64ed03dSBarry Smith   PetscFunctionBegin;
943251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
944251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
94532077d6dSBarry Smith   if (iascii) {
946b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
947456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
9484e220ebcSLois Curfman McInnes       MatInfo info;
949ce94432eSBarry Smith       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
950d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
9517b23a99aSBarry Smith       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
95277431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
95316608c43SJed Brown                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
954d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
955e6dd01d4SJed Brown       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
956d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
957e6dd01d4SJed Brown       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
958b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
9597b23a99aSBarry Smith       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
96007d81ca4SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
96157b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
9623a40ed3dSBarry Smith       PetscFunctionReturn(0);
963fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
96477431f27SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
9653a40ed3dSBarry Smith       PetscFunctionReturn(0);
96604929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
96704929863SHong Zhang       PetscFunctionReturn(0);
96857b952d6SSatish Balay     }
96957b952d6SSatish Balay   }
97057b952d6SSatish Balay 
9710f5bd95cSBarry Smith   if (isdraw) {
972b0a32e0cSBarry Smith     PetscDraw draw;
973ace3abfcSBarry Smith     PetscBool isnull;
974b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
975b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
97657b952d6SSatish Balay   }
97757b952d6SSatish Balay 
9787da1fb6eSBarry Smith   {
97957b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
98057b952d6SSatish Balay     Mat         A;
98157b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
982d0f46423SBarry Smith     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
9833eda8832SBarry Smith     MatScalar   *a;
9843e219373SBarry Smith     const char  *matname;
98557b952d6SSatish Balay 
986f204ca49SKris Buschelman     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
987f204ca49SKris Buschelman     /* Perhaps this should be the type of mat? */
988ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
98957b952d6SSatish Balay     if (!rank) {
990f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
991d64ed03dSBarry Smith     } else {
992f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
99357b952d6SSatish Balay     }
994f204ca49SKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
9950298fd71SBarry Smith     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
9962b82e772SSatish Balay     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
9973bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
99857b952d6SSatish Balay 
99957b952d6SSatish Balay     /* copy over the A part */
100057b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->A->data;
100157b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1002785e854fSJed Brown     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
100357b952d6SSatish Balay 
100457b952d6SSatish Balay     for (i=0; i<mbs; i++) {
1005899cda47SBarry Smith       rvals[0] = bs*(baij->rstartbs + i);
100626fbe8dcSKarl Rupp       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
100757b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1008899cda47SBarry Smith         col = (baij->cstartbs+aj[j])*bs;
100957b952d6SSatish Balay         for (k=0; k<bs; k++) {
101097e5c40aSBarry Smith           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1011cee3aa6bSSatish Balay           col++; a += bs;
101257b952d6SSatish Balay         }
101357b952d6SSatish Balay       }
101457b952d6SSatish Balay     }
101557b952d6SSatish Balay     /* copy over the B part */
101657b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
101757b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
101857b952d6SSatish Balay     for (i=0; i<mbs; i++) {
1019899cda47SBarry Smith       rvals[0] = bs*(baij->rstartbs + i);
102026fbe8dcSKarl Rupp       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
102157b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
102257b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
102357b952d6SSatish Balay         for (k=0; k<bs; k++) {
102497e5c40aSBarry Smith           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1025cee3aa6bSSatish Balay           col++; a += bs;
102657b952d6SSatish Balay         }
102757b952d6SSatish Balay       }
102857b952d6SSatish Balay     }
1029606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
10306d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10316d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
103255843e3eSBarry Smith     /*
103355843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1034b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
103555843e3eSBarry Smith     */
1036b0a32e0cSBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1037ade3a672SBarry Smith     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
10383e219373SBarry Smith     if (!rank) {
1039ade3a672SBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
10407da1fb6eSBarry Smith       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
104157b952d6SSatish Balay     }
1042b0a32e0cSBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
10436bf464f9SBarry Smith     ierr = MatDestroy(&A);CHKERRQ(ierr);
104457b952d6SSatish Balay   }
10453a40ed3dSBarry Smith   PetscFunctionReturn(0);
104657b952d6SSatish Balay }
104757b952d6SSatish Balay 
10484a2ae208SSatish Balay #undef __FUNCT__
1049660746e0SBarry Smith #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1050660746e0SBarry Smith static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1051660746e0SBarry Smith {
1052660746e0SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1053660746e0SBarry Smith   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1054660746e0SBarry Smith   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1055660746e0SBarry Smith   PetscErrorCode ierr;
10565f48b12bSBarry Smith   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1057e96a6426SSatish Balay   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1058660746e0SBarry Smith   int            fd;
1059660746e0SBarry Smith   PetscScalar    *column_values;
1060660746e0SBarry Smith   FILE           *file;
1061660746e0SBarry Smith   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1062638eb2ebSBarry Smith   PetscInt       message_count,flowcontrolcount;
1063660746e0SBarry Smith 
1064660746e0SBarry Smith   PetscFunctionBegin;
1065ce94432eSBarry Smith   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1066ce94432eSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1067660746e0SBarry Smith   nz   = bs2*(A->nz + B->nz);
1068660746e0SBarry Smith   rlen = mat->rmap->n;
10695872f025SBarry Smith   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1070660746e0SBarry Smith   if (!rank) {
1071660746e0SBarry Smith     header[0] = MAT_FILE_CLASSID;
1072660746e0SBarry Smith     header[1] = mat->rmap->N;
1073660746e0SBarry Smith     header[2] = mat->cmap->N;
107426fbe8dcSKarl Rupp 
1075ce94432eSBarry Smith     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1076660746e0SBarry Smith     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1077660746e0SBarry Smith     /* get largest number of rows any processor has */
1078660746e0SBarry Smith     range = mat->rmap->range;
1079660746e0SBarry Smith     for (i=1; i<size; i++) {
1080660746e0SBarry Smith       rlen = PetscMax(rlen,range[i+1] - range[i]);
1081660746e0SBarry Smith     }
1082660746e0SBarry Smith   } else {
1083ce94432eSBarry Smith     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1084660746e0SBarry Smith   }
1085660746e0SBarry Smith 
1086854ce69bSBarry Smith   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1087660746e0SBarry Smith   /* compute lengths of each row  */
1088660746e0SBarry Smith   for (i=0; i<a->mbs; i++) {
1089660746e0SBarry Smith     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1090660746e0SBarry Smith   }
1091660746e0SBarry Smith   /* store the row lengths to the file */
1092638eb2ebSBarry Smith   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1093660746e0SBarry Smith   if (!rank) {
1094660746e0SBarry Smith     MPI_Status status;
1095785e854fSJed Brown     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1096660746e0SBarry Smith     rlen = (range[1] - range[0])/bs;
1097660746e0SBarry Smith     for (i=0; i<rlen; i++) {
1098660746e0SBarry Smith       for (j=0; j<bs; j++) {
1099660746e0SBarry Smith         row_lens[i*bs+j] = bs*crow_lens[i];
1100660746e0SBarry Smith       }
1101660746e0SBarry Smith     }
1102660746e0SBarry Smith     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1103660746e0SBarry Smith     for (i=1; i<size; i++) {
1104660746e0SBarry Smith       rlen = (range[i+1] - range[i])/bs;
1105639ff905SBarry Smith       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1106ce94432eSBarry Smith       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1107660746e0SBarry Smith       for (k=0; k<rlen; k++) {
1108660746e0SBarry Smith         for (j=0; j<bs; j++) {
1109660746e0SBarry Smith           row_lens[k*bs+j] = bs*crow_lens[k];
1110660746e0SBarry Smith         }
1111660746e0SBarry Smith       }
1112660746e0SBarry Smith       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1113660746e0SBarry Smith     }
1114639ff905SBarry Smith     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1115660746e0SBarry Smith     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1116660746e0SBarry Smith   } else {
1117639ff905SBarry Smith     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1118ce94432eSBarry Smith     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1119639ff905SBarry Smith     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1120660746e0SBarry Smith   }
1121660746e0SBarry Smith   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1122660746e0SBarry Smith 
1123660746e0SBarry Smith   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1124660746e0SBarry Smith      information needed to make it for each row from a block row. This does require more communication but still not more than
1125660746e0SBarry Smith      the communication needed for the nonzero values  */
1126660746e0SBarry Smith   nzmax = nz; /*  space a largest processor needs */
1127ce94432eSBarry Smith   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1128785e854fSJed Brown   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1129660746e0SBarry Smith   cnt   = 0;
1130660746e0SBarry Smith   for (i=0; i<a->mbs; i++) {
1131660746e0SBarry Smith     pcnt = cnt;
1132660746e0SBarry Smith     for (j=B->i[i]; j<B->i[i+1]; j++) {
1133660746e0SBarry Smith       if ((col = garray[B->j[j]]) > cstart) break;
1134660746e0SBarry Smith       for (l=0; l<bs; l++) {
1135660746e0SBarry Smith         column_indices[cnt++] = bs*col+l;
1136660746e0SBarry Smith       }
1137660746e0SBarry Smith     }
1138660746e0SBarry Smith     for (k=A->i[i]; k<A->i[i+1]; k++) {
1139660746e0SBarry Smith       for (l=0; l<bs; l++) {
1140660746e0SBarry Smith         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1141660746e0SBarry Smith       }
1142660746e0SBarry Smith     }
1143660746e0SBarry Smith     for (; j<B->i[i+1]; j++) {
1144660746e0SBarry Smith       for (l=0; l<bs; l++) {
1145660746e0SBarry Smith         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1146660746e0SBarry Smith       }
1147660746e0SBarry Smith     }
1148660746e0SBarry Smith     len = cnt - pcnt;
1149660746e0SBarry Smith     for (k=1; k<bs; k++) {
1150660746e0SBarry Smith       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1151660746e0SBarry Smith       cnt += len;
1152660746e0SBarry Smith     }
1153660746e0SBarry Smith   }
1154660746e0SBarry Smith   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1155660746e0SBarry Smith 
1156660746e0SBarry Smith   /* store the columns to the file */
1157638eb2ebSBarry Smith   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1158660746e0SBarry Smith   if (!rank) {
1159660746e0SBarry Smith     MPI_Status status;
1160660746e0SBarry Smith     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1161660746e0SBarry Smith     for (i=1; i<size; i++) {
1162639ff905SBarry Smith       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1163ce94432eSBarry Smith       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1164ce94432eSBarry Smith       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1165660746e0SBarry Smith       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1166660746e0SBarry Smith     }
1167639ff905SBarry Smith     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1168660746e0SBarry Smith   } else {
1169639ff905SBarry Smith     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1170ce94432eSBarry Smith     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1171ce94432eSBarry Smith     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1172639ff905SBarry Smith     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1173660746e0SBarry Smith   }
1174660746e0SBarry Smith   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1175660746e0SBarry Smith 
1176660746e0SBarry Smith   /* load up the numerical values */
1177785e854fSJed Brown   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1178660746e0SBarry Smith   cnt  = 0;
1179660746e0SBarry Smith   for (i=0; i<a->mbs; i++) {
1180660746e0SBarry Smith     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1181660746e0SBarry Smith     for (j=B->i[i]; j<B->i[i+1]; j++) {
1182660746e0SBarry Smith       if (garray[B->j[j]] > cstart) break;
1183660746e0SBarry Smith       for (l=0; l<bs; l++) {
1184660746e0SBarry Smith         for (ll=0; ll<bs; ll++) {
1185660746e0SBarry Smith           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1186660746e0SBarry Smith         }
1187660746e0SBarry Smith       }
1188660746e0SBarry Smith       cnt += bs;
1189660746e0SBarry Smith     }
1190660746e0SBarry Smith     for (k=A->i[i]; k<A->i[i+1]; k++) {
1191660746e0SBarry Smith       for (l=0; l<bs; l++) {
1192660746e0SBarry Smith         for (ll=0; ll<bs; ll++) {
1193660746e0SBarry Smith           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1194660746e0SBarry Smith         }
1195660746e0SBarry Smith       }
1196660746e0SBarry Smith       cnt += bs;
1197660746e0SBarry Smith     }
1198660746e0SBarry Smith     for (; j<B->i[i+1]; j++) {
1199660746e0SBarry Smith       for (l=0; l<bs; l++) {
1200660746e0SBarry Smith         for (ll=0; ll<bs; ll++) {
1201660746e0SBarry Smith           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1202660746e0SBarry Smith         }
1203660746e0SBarry Smith       }
1204660746e0SBarry Smith       cnt += bs;
1205660746e0SBarry Smith     }
1206660746e0SBarry Smith     cnt += (bs-1)*rlen;
1207660746e0SBarry Smith   }
1208660746e0SBarry Smith   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1209660746e0SBarry Smith 
1210660746e0SBarry Smith   /* store the column values to the file */
1211638eb2ebSBarry Smith   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1212660746e0SBarry Smith   if (!rank) {
1213660746e0SBarry Smith     MPI_Status status;
1214660746e0SBarry Smith     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1215660746e0SBarry Smith     for (i=1; i<size; i++) {
1216639ff905SBarry Smith       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1217ce94432eSBarry Smith       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1218ce94432eSBarry Smith       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1219660746e0SBarry Smith       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1220660746e0SBarry Smith     }
1221639ff905SBarry Smith     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1222660746e0SBarry Smith   } else {
1223639ff905SBarry Smith     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1224ce94432eSBarry Smith     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225ce94432eSBarry Smith     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226639ff905SBarry Smith     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1227660746e0SBarry Smith   }
1228660746e0SBarry Smith   ierr = PetscFree(column_values);CHKERRQ(ierr);
1229660746e0SBarry Smith 
1230660746e0SBarry Smith   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1231660746e0SBarry Smith   if (file) {
1232660746e0SBarry Smith     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1233660746e0SBarry Smith   }
1234660746e0SBarry Smith   PetscFunctionReturn(0);
1235660746e0SBarry Smith }
1236660746e0SBarry Smith 
1237660746e0SBarry Smith #undef __FUNCT__
12384a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ"
1239dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
124057b952d6SSatish Balay {
1241dfbe8321SBarry Smith   PetscErrorCode ierr;
1242ace3abfcSBarry Smith   PetscBool      iascii,isdraw,issocket,isbinary;
124357b952d6SSatish Balay 
1244d64ed03dSBarry Smith   PetscFunctionBegin;
1245251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1246251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1247251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1248251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1249660746e0SBarry Smith   if (iascii || isdraw || issocket) {
12507b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1251660746e0SBarry Smith   } else if (isbinary) {
1252660746e0SBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
125357b952d6SSatish Balay   }
12543a40ed3dSBarry Smith   PetscFunctionReturn(0);
125557b952d6SSatish Balay }
125657b952d6SSatish Balay 
12574a2ae208SSatish Balay #undef __FUNCT__
12584a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ"
1259dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
126079bdfe76SSatish Balay {
126179bdfe76SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1262dfbe8321SBarry Smith   PetscErrorCode ierr;
126379bdfe76SSatish Balay 
1264d64ed03dSBarry Smith   PetscFunctionBegin;
1265aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1266d0f46423SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
126779bdfe76SSatish Balay #endif
12688798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12698798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
12706bf464f9SBarry Smith   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
12716bf464f9SBarry Smith   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1272aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
12736bc0bbbfSBarry Smith   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
127448e59246SSatish Balay #else
127505b42c5fSBarry Smith   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
127648e59246SSatish Balay #endif
127705b42c5fSBarry Smith   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
12786bf464f9SBarry Smith   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
12796bf464f9SBarry Smith   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1280fca92195SBarry Smith   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
128105b42c5fSBarry Smith   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1282fca92195SBarry Smith   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1283899cda47SBarry Smith   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1284bf0cc555SLisandro Dalcin   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1285901853e0SKris Buschelman 
1286dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1287bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1288bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1289bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1290bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1291bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1292bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1293bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1294bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1295bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
12963a40ed3dSBarry Smith   PetscFunctionReturn(0);
129779bdfe76SSatish Balay }
129879bdfe76SSatish Balay 
12994a2ae208SSatish Balay #undef __FUNCT__
13004a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ"
1301dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1302cee3aa6bSSatish Balay {
1303cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1304dfbe8321SBarry Smith   PetscErrorCode ierr;
1305b24ad042SBarry Smith   PetscInt       nt;
1306cee3aa6bSSatish Balay 
1307d64ed03dSBarry Smith   PetscFunctionBegin;
1308e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1309e7e72b3dSBarry Smith   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1310e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1311e7e72b3dSBarry Smith   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1312ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1313f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1314ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1315f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
13163a40ed3dSBarry Smith   PetscFunctionReturn(0);
1317cee3aa6bSSatish Balay }
1318cee3aa6bSSatish Balay 
13194a2ae208SSatish Balay #undef __FUNCT__
13204a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1321dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1322cee3aa6bSSatish Balay {
1323cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1324dfbe8321SBarry Smith   PetscErrorCode ierr;
1325d64ed03dSBarry Smith 
1326d64ed03dSBarry Smith   PetscFunctionBegin;
1327ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1328f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1329ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1330f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
13313a40ed3dSBarry Smith   PetscFunctionReturn(0);
1332cee3aa6bSSatish Balay }
1333cee3aa6bSSatish Balay 
13344a2ae208SSatish Balay #undef __FUNCT__
13354a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1336dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1337cee3aa6bSSatish Balay {
1338cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1339dfbe8321SBarry Smith   PetscErrorCode ierr;
1340ace3abfcSBarry Smith   PetscBool      merged;
1341cee3aa6bSSatish Balay 
1342d64ed03dSBarry Smith   PetscFunctionBegin;
1343a5ff213dSBarry Smith   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1344cee3aa6bSSatish Balay   /* do nondiagonal part */
13457c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1346a5ff213dSBarry Smith   if (!merged) {
1347cee3aa6bSSatish Balay     /* send it on its way */
1348ca9f406cSSatish Balay     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1349cee3aa6bSSatish Balay     /* do local part */
13507c922b88SBarry Smith     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1351cee3aa6bSSatish Balay     /* receive remote parts: note this assumes the values are not actually */
1352a5ff213dSBarry Smith     /* inserted in yy until the next line */
1353ca9f406cSSatish Balay     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1354a5ff213dSBarry Smith   } else {
1355a5ff213dSBarry Smith     /* do local part */
1356a5ff213dSBarry Smith     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1357a5ff213dSBarry Smith     /* send it on its way */
1358ca9f406cSSatish Balay     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1359a5ff213dSBarry Smith     /* values actually were received in the Begin() but we need to call this nop */
1360ca9f406cSSatish Balay     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1361a5ff213dSBarry Smith   }
13623a40ed3dSBarry Smith   PetscFunctionReturn(0);
1363cee3aa6bSSatish Balay }
1364cee3aa6bSSatish Balay 
13654a2ae208SSatish Balay #undef __FUNCT__
13664a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1367dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1368cee3aa6bSSatish Balay {
1369cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1370dfbe8321SBarry Smith   PetscErrorCode ierr;
1371cee3aa6bSSatish Balay 
1372d64ed03dSBarry Smith   PetscFunctionBegin;
1373cee3aa6bSSatish Balay   /* do nondiagonal part */
13747c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1375cee3aa6bSSatish Balay   /* send it on its way */
1376ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1377cee3aa6bSSatish Balay   /* do local part */
13787c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1379cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1380cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1381cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1382ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
13833a40ed3dSBarry Smith   PetscFunctionReturn(0);
1384cee3aa6bSSatish Balay }
1385cee3aa6bSSatish Balay 
1386cee3aa6bSSatish Balay /*
1387cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1388cee3aa6bSSatish Balay    diagonal block
1389cee3aa6bSSatish Balay */
13904a2ae208SSatish Balay #undef __FUNCT__
13914a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1392dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1393cee3aa6bSSatish Balay {
1394cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1395dfbe8321SBarry Smith   PetscErrorCode ierr;
1396d64ed03dSBarry Smith 
1397d64ed03dSBarry Smith   PetscFunctionBegin;
1398e32f2f54SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
13993a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
14003a40ed3dSBarry Smith   PetscFunctionReturn(0);
1401cee3aa6bSSatish Balay }
1402cee3aa6bSSatish Balay 
14034a2ae208SSatish Balay #undef __FUNCT__
14044a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ"
1405f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1406cee3aa6bSSatish Balay {
1407cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1408dfbe8321SBarry Smith   PetscErrorCode ierr;
1409d64ed03dSBarry Smith 
1410d64ed03dSBarry Smith   PetscFunctionBegin;
1411f4df32b1SMatthew Knepley   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1412f4df32b1SMatthew Knepley   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
14133a40ed3dSBarry Smith   PetscFunctionReturn(0);
1414cee3aa6bSSatish Balay }
1415026e39d0SSatish Balay 
14164a2ae208SSatish Balay #undef __FUNCT__
14174a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ"
1418b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1419acdf5bf4SSatish Balay {
1420acdf5bf4SSatish Balay   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
142187828ca2SBarry Smith   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
14226849ba73SBarry Smith   PetscErrorCode ierr;
1423d0f46423SBarry Smith   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1424d0f46423SBarry Smith   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1425899cda47SBarry Smith   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1426acdf5bf4SSatish Balay 
1427d64ed03dSBarry Smith   PetscFunctionBegin;
1428e7e72b3dSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1429e32f2f54SBarry Smith   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1430acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1431acdf5bf4SSatish Balay 
1432acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1433acdf5bf4SSatish Balay     /*
1434acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1435acdf5bf4SSatish Balay     */
1436acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1437b24ad042SBarry Smith     PetscInt    max = 1,mbs = mat->mbs,tmp;
1438bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1439acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
144026fbe8dcSKarl Rupp       if (max < tmp) max = tmp;
1441acdf5bf4SSatish Balay     }
1442dcca6d9dSJed Brown     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1443acdf5bf4SSatish Balay   }
1444d9d09a02SSatish Balay   lrow = row - brstart;
1445acdf5bf4SSatish Balay 
1446acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1447acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1448acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1449f830108cSBarry Smith   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1450f830108cSBarry Smith   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1451acdf5bf4SSatish Balay   nztot = nzA + nzB;
1452acdf5bf4SSatish Balay 
1453acdf5bf4SSatish Balay   cmap = mat->garray;
1454acdf5bf4SSatish Balay   if (v  || idx) {
1455acdf5bf4SSatish Balay     if (nztot) {
1456acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1457b24ad042SBarry Smith       PetscInt imark = -1;
1458acdf5bf4SSatish Balay       if (v) {
1459acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1460acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1461d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1462acdf5bf4SSatish Balay           else break;
1463acdf5bf4SSatish Balay         }
1464acdf5bf4SSatish Balay         imark = i;
1465acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1466acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1467acdf5bf4SSatish Balay       }
1468acdf5bf4SSatish Balay       if (idx) {
1469acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1470acdf5bf4SSatish Balay         if (imark > -1) {
1471acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1472bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1473acdf5bf4SSatish Balay           }
1474acdf5bf4SSatish Balay         } else {
1475acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
147626fbe8dcSKarl Rupp             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1477acdf5bf4SSatish Balay             else break;
1478acdf5bf4SSatish Balay           }
1479acdf5bf4SSatish Balay           imark = i;
1480acdf5bf4SSatish Balay         }
1481d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1482d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1483acdf5bf4SSatish Balay       }
1484d64ed03dSBarry Smith     } else {
1485d212a18eSSatish Balay       if (idx) *idx = 0;
1486d212a18eSSatish Balay       if (v)   *v   = 0;
1487d212a18eSSatish Balay     }
1488acdf5bf4SSatish Balay   }
1489acdf5bf4SSatish Balay   *nz  = nztot;
1490f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1491f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
14923a40ed3dSBarry Smith   PetscFunctionReturn(0);
1493acdf5bf4SSatish Balay }
1494acdf5bf4SSatish Balay 
14954a2ae208SSatish Balay #undef __FUNCT__
14964a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1497b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1498acdf5bf4SSatish Balay {
1499acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1500d64ed03dSBarry Smith 
1501d64ed03dSBarry Smith   PetscFunctionBegin;
1502e7e72b3dSBarry Smith   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1503acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
15043a40ed3dSBarry Smith   PetscFunctionReturn(0);
1505acdf5bf4SSatish Balay }
1506acdf5bf4SSatish Balay 
15074a2ae208SSatish Balay #undef __FUNCT__
15084a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1509dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
151058667388SSatish Balay {
151158667388SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1512dfbe8321SBarry Smith   PetscErrorCode ierr;
1513d64ed03dSBarry Smith 
1514d64ed03dSBarry Smith   PetscFunctionBegin;
151558667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
151658667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
15173a40ed3dSBarry Smith   PetscFunctionReturn(0);
151858667388SSatish Balay }
15190ac07820SSatish Balay 
15204a2ae208SSatish Balay #undef __FUNCT__
15214a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1522dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
15230ac07820SSatish Balay {
15244e220ebcSLois Curfman McInnes   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
15254e220ebcSLois Curfman McInnes   Mat            A  = a->A,B = a->B;
1526dfbe8321SBarry Smith   PetscErrorCode ierr;
1527329f5518SBarry Smith   PetscReal      isend[5],irecv[5];
15280ac07820SSatish Balay 
1529d64ed03dSBarry Smith   PetscFunctionBegin;
1530d0f46423SBarry Smith   info->block_size = (PetscReal)matin->rmap->bs;
153126fbe8dcSKarl Rupp 
15324e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
153326fbe8dcSKarl Rupp 
15340e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1535de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
153626fbe8dcSKarl Rupp 
15374e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
153826fbe8dcSKarl Rupp 
15390e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1540de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
154126fbe8dcSKarl Rupp 
15420ac07820SSatish Balay   if (flag == MAT_LOCAL) {
15434e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
15444e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
15454e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
15464e220ebcSLois Curfman McInnes     info->memory       = isend[3];
15474e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
15480ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1549ce94432eSBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
155026fbe8dcSKarl Rupp 
15514e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15524e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15534e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15544e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15554e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
15560ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1557ce94432eSBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
155826fbe8dcSKarl Rupp 
15594e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15604e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15614e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15624e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15634e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1564ce94432eSBarry Smith   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
15654e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
15664e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
15674e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
15683a40ed3dSBarry Smith   PetscFunctionReturn(0);
15690ac07820SSatish Balay }
15700ac07820SSatish Balay 
15714a2ae208SSatish Balay #undef __FUNCT__
15724a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ"
1573ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
157458667388SSatish Balay {
157558667388SSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1576dfbe8321SBarry Smith   PetscErrorCode ierr;
157758667388SSatish Balay 
1578d64ed03dSBarry Smith   PetscFunctionBegin;
157912c028f9SKris Buschelman   switch (op) {
1580512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
158112c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
158228b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1583a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
158412c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
15854e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
15864e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
158712c028f9SKris Buschelman     break;
158812c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
15894e0d8c25SBarry Smith     a->roworiented = flg;
159026fbe8dcSKarl Rupp 
15914e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
15924e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
159312c028f9SKris Buschelman     break;
15944e0d8c25SBarry Smith   case MAT_NEW_DIAGONALS:
1595290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
159612c028f9SKris Buschelman     break;
159712c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
15984e0d8c25SBarry Smith     a->donotstash = flg;
159912c028f9SKris Buschelman     break;
160012c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
16014e0d8c25SBarry Smith     a->ht_flag = flg;
160212c028f9SKris Buschelman     break;
160377e54ba9SKris Buschelman   case MAT_SYMMETRIC:
160477e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
16052188ac68SBarry Smith   case MAT_HERMITIAN:
16062188ac68SBarry Smith   case MAT_SYMMETRY_ETERNAL:
16074e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
160877e54ba9SKris Buschelman     break;
160912c028f9SKris Buschelman   default:
1610ce94432eSBarry Smith     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1611d64ed03dSBarry Smith   }
16123a40ed3dSBarry Smith   PetscFunctionReturn(0);
161358667388SSatish Balay }
161458667388SSatish Balay 
16154a2ae208SSatish Balay #undef __FUNCT__
16166a719282SBarry Smith #define __FUNCT__ "MatTranspose_MPIBAIJ"
1617fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
16180ac07820SSatish Balay {
16190ac07820SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
16200ac07820SSatish Balay   Mat_SeqBAIJ    *Aloc;
16210ac07820SSatish Balay   Mat            B;
1622dfbe8321SBarry Smith   PetscErrorCode ierr;
1623d0f46423SBarry Smith   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1624d0f46423SBarry Smith   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
16253eda8832SBarry Smith   MatScalar      *a;
16260ac07820SSatish Balay 
1627d64ed03dSBarry Smith   PetscFunctionBegin;
1628ce94432eSBarry Smith   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1629fc4dec0aSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1630ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1631d0f46423SBarry Smith     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
16327adad957SLisandro Dalcin     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
16332e72b8d9SBarry Smith     /* Do not know preallocation information, but must set block size */
16340298fd71SBarry Smith     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1635fc4dec0aSBarry Smith   } else {
1636fc4dec0aSBarry Smith     B = *matout;
1637fc4dec0aSBarry Smith   }
16380ac07820SSatish Balay 
16390ac07820SSatish Balay   /* copy over the A part */
16400ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
16410ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1642785e854fSJed Brown   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
16430ac07820SSatish Balay 
16440ac07820SSatish Balay   for (i=0; i<mbs; i++) {
1645899cda47SBarry Smith     rvals[0] = bs*(baij->rstartbs + i);
164626fbe8dcSKarl Rupp     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
16470ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1648899cda47SBarry Smith       col = (baij->cstartbs+aj[j])*bs;
16490ac07820SSatish Balay       for (k=0; k<bs; k++) {
165097e5c40aSBarry Smith         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
165126fbe8dcSKarl Rupp 
16520ac07820SSatish Balay         col++; a += bs;
16530ac07820SSatish Balay       }
16540ac07820SSatish Balay     }
16550ac07820SSatish Balay   }
16560ac07820SSatish Balay   /* copy over the B part */
16570ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
16580ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
16590ac07820SSatish Balay   for (i=0; i<mbs; i++) {
1660899cda47SBarry Smith     rvals[0] = bs*(baij->rstartbs + i);
166126fbe8dcSKarl Rupp     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
16620ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16630ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
16640ac07820SSatish Balay       for (k=0; k<bs; k++) {
166597e5c40aSBarry Smith         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
166626fbe8dcSKarl Rupp         col++;
166726fbe8dcSKarl Rupp         a += bs;
16680ac07820SSatish Balay       }
16690ac07820SSatish Balay     }
16700ac07820SSatish Balay   }
1671606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
16720ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16730ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16740ac07820SSatish Balay 
167526fbe8dcSKarl Rupp   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
167626fbe8dcSKarl Rupp   else {
1677eb6b5d47SBarry Smith     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
16780ac07820SSatish Balay   }
16793a40ed3dSBarry Smith   PetscFunctionReturn(0);
16800ac07820SSatish Balay }
16810e95ebc0SSatish Balay 
16824a2ae208SSatish Balay #undef __FUNCT__
16834a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1684dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
16850e95ebc0SSatish Balay {
168636c4a09eSSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
168736c4a09eSSatish Balay   Mat            a     = baij->A,b = baij->B;
1688dfbe8321SBarry Smith   PetscErrorCode ierr;
1689b24ad042SBarry Smith   PetscInt       s1,s2,s3;
16900e95ebc0SSatish Balay 
1691d64ed03dSBarry Smith   PetscFunctionBegin;
169236c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
169336c4a09eSSatish Balay   if (rr) {
169436c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1695e32f2f54SBarry Smith     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
169636c4a09eSSatish Balay     /* Overlap communication with computation. */
1697ca9f406cSSatish Balay     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
169836c4a09eSSatish Balay   }
16990e95ebc0SSatish Balay   if (ll) {
17000e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1701e32f2f54SBarry Smith     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
17020298fd71SBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
17030e95ebc0SSatish Balay   }
170436c4a09eSSatish Balay   /* scale  the diagonal block */
170536c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
170636c4a09eSSatish Balay 
170736c4a09eSSatish Balay   if (rr) {
170836c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
1709ca9f406cSSatish Balay     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17100298fd71SBarry Smith     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
171136c4a09eSSatish Balay   }
17123a40ed3dSBarry Smith   PetscFunctionReturn(0);
17130e95ebc0SSatish Balay }
17140e95ebc0SSatish Balay 
17154a2ae208SSatish Balay #undef __FUNCT__
17164a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ"
17172b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
17180ac07820SSatish Balay {
17190ac07820SSatish Balay   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
172065a92638SMatthew G. Knepley   PetscInt      *owners = A->rmap->range;
172165a92638SMatthew G. Knepley   PetscInt       n      = A->rmap->n;
172265a92638SMatthew G. Knepley   PetscSF        sf;
172365a92638SMatthew G. Knepley   PetscInt      *lrows;
172465a92638SMatthew G. Knepley   PetscSFNode   *rrows;
172569ea2d38SJed Brown   PetscInt       r, p = 0, len = 0;
17266849ba73SBarry Smith   PetscErrorCode ierr;
17270ac07820SSatish Balay 
1728d64ed03dSBarry Smith   PetscFunctionBegin;
172965a92638SMatthew G. Knepley   /* Create SF where leaves are input rows and roots are owned rows */
1730785e854fSJed Brown   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
173165a92638SMatthew G. Knepley   for (r = 0; r < n; ++r) lrows[r] = -1;
1732a34163a4SJed Brown   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
173365a92638SMatthew G. Knepley   for (r = 0; r < N; ++r) {
173465a92638SMatthew G. Knepley     const PetscInt idx   = rows[r];
173569ea2d38SJed Brown     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
173669ea2d38SJed Brown     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
173769ea2d38SJed Brown       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
173869ea2d38SJed Brown     }
1739a34163a4SJed Brown     if (A->nooffproczerorows) {
1740a34163a4SJed Brown       if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank);
1741a34163a4SJed Brown       lrows[len++] = idx - owners[p];
1742a34163a4SJed Brown     } else {
174365a92638SMatthew G. Knepley       rrows[r].rank = p;
174465a92638SMatthew G. Knepley       rrows[r].index = rows[r] - owners[p];
17450ac07820SSatish Balay     }
1746a34163a4SJed Brown   }
1747a34163a4SJed Brown   if (!A->nooffproczerorows) {
174865a92638SMatthew G. Knepley     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
174965a92638SMatthew G. Knepley     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
175065a92638SMatthew G. Knepley     /* Collect flags for rows to be zeroed */
175165a92638SMatthew G. Knepley     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
175265a92638SMatthew G. Knepley     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
175365a92638SMatthew G. Knepley     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
175465a92638SMatthew G. Knepley     /* Compress and put in row numbers */
175565a92638SMatthew G. Knepley     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1756a34163a4SJed Brown   }
175797b48c8fSBarry Smith   /* fix right hand side if needed */
175897b48c8fSBarry Smith   if (x && b) {
175965a92638SMatthew G. Knepley     const PetscScalar *xx;
176065a92638SMatthew G. Knepley     PetscScalar       *bb;
176165a92638SMatthew G. Knepley 
176297b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
176397b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
176465a92638SMatthew G. Knepley     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
176597b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
176697b48c8fSBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
176797b48c8fSBarry Smith   }
176897b48c8fSBarry Smith 
17690ac07820SSatish Balay   /* actually zap the local rows */
177072dacd9aSBarry Smith   /*
177172dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
1772a8c7a070SBarry Smith      is square and the user wishes to set the diagonal we use separate
177372dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
177472dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
177572dacd9aSBarry Smith 
177672dacd9aSBarry Smith   */
17779c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1778a34163a4SJed Brown   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1779d0f46423SBarry Smith   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1780a34163a4SJed Brown     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1781f4df32b1SMatthew Knepley   } else if (diag != 0.0) {
178265a92638SMatthew G. Knepley     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1783e7e72b3dSBarry Smith     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1784512a5fc5SBarry Smith        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
178565a92638SMatthew G. Knepley     for (r = 0; r < len; ++r) {
178665a92638SMatthew G. Knepley       const PetscInt row = lrows[r] + A->rmap->rstart;
1787f4df32b1SMatthew Knepley       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1788a07cd24cSSatish Balay     }
1789a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1790a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17919c957beeSSatish Balay   } else {
1792a34163a4SJed Brown     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1793a07cd24cSSatish Balay   }
1794606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
17954f9cfa9eSBarry Smith 
17964f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
17974f9cfa9eSBarry Smith   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1798e56f5c9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
179909e82e2bSBarry Smith     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1800e56f5c9eSBarry Smith   }
18013a40ed3dSBarry Smith   PetscFunctionReturn(0);
18020ac07820SSatish Balay }
180372dacd9aSBarry Smith 
18044a2ae208SSatish Balay #undef __FUNCT__
18056f0a72daSMatthew G. Knepley #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
18066f0a72daSMatthew G. Knepley PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
18076f0a72daSMatthew G. Knepley {
18086f0a72daSMatthew G. Knepley   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
18096f0a72daSMatthew G. Knepley   PetscErrorCode    ierr;
18105ba17502SJed Brown   PetscMPIInt       n = A->rmap->n;
1811fbb64d0eSMatthew G. Knepley   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
18126f0a72daSMatthew G. Knepley   PetscInt          *lrows,*owners = A->rmap->range;
18136f0a72daSMatthew G. Knepley   PetscSFNode       *rrows;
18146f0a72daSMatthew G. Knepley   PetscSF           sf;
18156f0a72daSMatthew G. Knepley   const PetscScalar *xx;
18166f0a72daSMatthew G. Knepley   PetscScalar       *bb,*mask;
18176f0a72daSMatthew G. Knepley   Vec               xmask,lmask;
18186f0a72daSMatthew G. Knepley   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
18196f0a72daSMatthew G. Knepley   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
18206f0a72daSMatthew G. Knepley   PetscScalar       *aa;
18216f0a72daSMatthew G. Knepley 
18226f0a72daSMatthew G. Knepley   PetscFunctionBegin;
18236f0a72daSMatthew G. Knepley   /* Create SF where leaves are input rows and roots are owned rows */
18246f0a72daSMatthew G. Knepley   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
18256f0a72daSMatthew G. Knepley   for (r = 0; r < n; ++r) lrows[r] = -1;
18266f0a72daSMatthew G. Knepley   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
18276f0a72daSMatthew G. Knepley   for (r = 0; r < N; ++r) {
18286f0a72daSMatthew G. Knepley     const PetscInt idx   = rows[r];
18295ba17502SJed Brown     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
18305ba17502SJed Brown     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
18315ba17502SJed Brown       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
18325ba17502SJed Brown     }
18336f0a72daSMatthew G. Knepley     rrows[r].rank  = p;
18346f0a72daSMatthew G. Knepley     rrows[r].index = rows[r] - owners[p];
18356f0a72daSMatthew G. Knepley   }
18366f0a72daSMatthew G. Knepley   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
18376f0a72daSMatthew G. Knepley   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
18386f0a72daSMatthew G. Knepley   /* Collect flags for rows to be zeroed */
18396f0a72daSMatthew G. Knepley   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
18406f0a72daSMatthew G. Knepley   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
18416f0a72daSMatthew G. Knepley   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
18426f0a72daSMatthew G. Knepley   /* Compress and put in row numbers */
18436f0a72daSMatthew G. Knepley   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
18446f0a72daSMatthew G. Knepley   /* zero diagonal part of matrix */
18456f0a72daSMatthew G. Knepley   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
18466f0a72daSMatthew G. Knepley   /* handle off diagonal part of matrix */
18472a7a6963SBarry Smith   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
18486f0a72daSMatthew G. Knepley   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
18496f0a72daSMatthew G. Knepley   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
18506f0a72daSMatthew G. Knepley   for (i=0; i<len; i++) bb[lrows[i]] = 1;
18516f0a72daSMatthew G. Knepley   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
18526f0a72daSMatthew G. Knepley   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
18536f0a72daSMatthew G. Knepley   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
18546f0a72daSMatthew G. Knepley   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
18556f0a72daSMatthew G. Knepley   if (x) {
18566f0a72daSMatthew G. Knepley     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
18576f0a72daSMatthew G. Knepley     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
18586f0a72daSMatthew G. Knepley     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
18596f0a72daSMatthew G. Knepley     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
18606f0a72daSMatthew G. Knepley   }
18616f0a72daSMatthew G. Knepley   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
18626f0a72daSMatthew G. Knepley   /* remove zeroed rows of off diagonal matrix */
18636f0a72daSMatthew G. Knepley   for (i = 0; i < len; ++i) {
18646f0a72daSMatthew G. Knepley     row   = lrows[i];
18656f0a72daSMatthew G. Knepley     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
18666f0a72daSMatthew G. Knepley     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
18676f0a72daSMatthew G. Knepley     for (k = 0; k < count; ++k) {
18686f0a72daSMatthew G. Knepley       aa[0] = 0.0;
18696f0a72daSMatthew G. Knepley       aa   += bs;
18706f0a72daSMatthew G. Knepley     }
18716f0a72daSMatthew G. Knepley   }
18726f0a72daSMatthew G. Knepley   /* loop over all elements of off process part of matrix zeroing removed columns*/
18736f0a72daSMatthew G. Knepley   for (i = 0; i < l->B->rmap->N; ++i) {
18746f0a72daSMatthew G. Knepley     row = i/bs;
18756f0a72daSMatthew G. Knepley     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
18766f0a72daSMatthew G. Knepley       for (k = 0; k < bs; ++k) {
18776f0a72daSMatthew G. Knepley         col = bs*baij->j[j] + k;
18786f0a72daSMatthew G. Knepley         if (PetscAbsScalar(mask[col])) {
18796f0a72daSMatthew G. Knepley           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
18806f0a72daSMatthew G. Knepley           if (b) bb[i] -= aa[0]*xx[col];
18816f0a72daSMatthew G. Knepley           aa[0] = 0.0;
18826f0a72daSMatthew G. Knepley         }
18836f0a72daSMatthew G. Knepley       }
18846f0a72daSMatthew G. Knepley     }
18856f0a72daSMatthew G. Knepley   }
18866f0a72daSMatthew G. Knepley   if (x) {
18876f0a72daSMatthew G. Knepley     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
18886f0a72daSMatthew G. Knepley     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
18896f0a72daSMatthew G. Knepley   }
18906f0a72daSMatthew G. Knepley   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
18916f0a72daSMatthew G. Knepley   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
18926f0a72daSMatthew G. Knepley   ierr = PetscFree(lrows);CHKERRQ(ierr);
18934f9cfa9eSBarry Smith 
18944f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
18954f9cfa9eSBarry Smith   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
18964f9cfa9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
18974f9cfa9eSBarry Smith     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
18984f9cfa9eSBarry Smith   }
18996f0a72daSMatthew G. Knepley   PetscFunctionReturn(0);
19006f0a72daSMatthew G. Knepley }
19016f0a72daSMatthew G. Knepley 
19026f0a72daSMatthew G. Knepley #undef __FUNCT__
19034a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1904dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1905bb5a7306SBarry Smith {
1906bb5a7306SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1907dfbe8321SBarry Smith   PetscErrorCode ierr;
1908d64ed03dSBarry Smith 
1909d64ed03dSBarry Smith   PetscFunctionBegin;
1910bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
19113a40ed3dSBarry Smith   PetscFunctionReturn(0);
1912bb5a7306SBarry Smith }
1913bb5a7306SBarry Smith 
19146849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
19150ac07820SSatish Balay 
19164a2ae208SSatish Balay #undef __FUNCT__
19174a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ"
1918ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
19197fc3c18eSBarry Smith {
19207fc3c18eSBarry Smith   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
19217fc3c18eSBarry Smith   Mat            a,b,c,d;
1922ace3abfcSBarry Smith   PetscBool      flg;
1923dfbe8321SBarry Smith   PetscErrorCode ierr;
19247fc3c18eSBarry Smith 
19257fc3c18eSBarry Smith   PetscFunctionBegin;
19267fc3c18eSBarry Smith   a = matA->A; b = matA->B;
19277fc3c18eSBarry Smith   c = matB->A; d = matB->B;
19287fc3c18eSBarry Smith 
19297fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1930abc0a331SBarry Smith   if (flg) {
19317fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
19327fc3c18eSBarry Smith   }
1933ce94432eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
19347fc3c18eSBarry Smith   PetscFunctionReturn(0);
19357fc3c18eSBarry Smith }
19367fc3c18eSBarry Smith 
19373c896bc6SHong Zhang #undef __FUNCT__
19383c896bc6SHong Zhang #define __FUNCT__ "MatCopy_MPIBAIJ"
19393c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
19403c896bc6SHong Zhang {
19413c896bc6SHong Zhang   PetscErrorCode ierr;
19423c896bc6SHong Zhang   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19433c896bc6SHong Zhang   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
19443c896bc6SHong Zhang 
19453c896bc6SHong Zhang   PetscFunctionBegin;
19463c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
19473c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
19483c896bc6SHong Zhang     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
19493c896bc6SHong Zhang   } else {
19503c896bc6SHong Zhang     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
19513c896bc6SHong Zhang     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
19523c896bc6SHong Zhang   }
19533c896bc6SHong Zhang   PetscFunctionReturn(0);
19543c896bc6SHong Zhang }
1955273d9f13SBarry Smith 
19564a2ae208SSatish Balay #undef __FUNCT__
19574994cf47SJed Brown #define __FUNCT__ "MatSetUp_MPIBAIJ"
19584994cf47SJed Brown PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1959273d9f13SBarry Smith {
1960dfbe8321SBarry Smith   PetscErrorCode ierr;
1961273d9f13SBarry Smith 
1962273d9f13SBarry Smith   PetscFunctionBegin;
1963535b19f3SBarry Smith   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1964273d9f13SBarry Smith   PetscFunctionReturn(0);
1965273d9f13SBarry Smith }
1966273d9f13SBarry Smith 
19674fe895cdSHong Zhang #undef __FUNCT__
19684de5dceeSHong Zhang #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ"
19694de5dceeSHong Zhang PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
19704de5dceeSHong Zhang {
1971001ddc4fSHong Zhang   PetscErrorCode ierr;
1972001ddc4fSHong Zhang   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
19734de5dceeSHong Zhang   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
19744de5dceeSHong Zhang   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
19754de5dceeSHong Zhang 
19764de5dceeSHong Zhang   PetscFunctionBegin;
1977001ddc4fSHong Zhang   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
19784de5dceeSHong Zhang   PetscFunctionReturn(0);
19794de5dceeSHong Zhang }
19804de5dceeSHong Zhang 
19814de5dceeSHong Zhang #undef __FUNCT__
19824fe895cdSHong Zhang #define __FUNCT__ "MatAXPY_MPIBAIJ"
19834fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
19844fe895cdSHong Zhang {
19854fe895cdSHong Zhang   PetscErrorCode ierr;
19864fe895cdSHong Zhang   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
19874fe895cdSHong Zhang   PetscBLASInt   bnz,one=1;
19884fe895cdSHong Zhang   Mat_SeqBAIJ    *x,*y;
19894fe895cdSHong Zhang 
19904fe895cdSHong Zhang   PetscFunctionBegin;
19914fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
19924fe895cdSHong Zhang     PetscScalar alpha = a;
19934fe895cdSHong Zhang     x    = (Mat_SeqBAIJ*)xx->A->data;
19944fe895cdSHong Zhang     y    = (Mat_SeqBAIJ*)yy->A->data;
1995c5df96a5SBarry Smith     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
19968b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
19974fe895cdSHong Zhang     x    = (Mat_SeqBAIJ*)xx->B->data;
19984fe895cdSHong Zhang     y    = (Mat_SeqBAIJ*)yy->B->data;
1999c5df96a5SBarry Smith     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
20008b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2001a3fa217bSJose E. Roman     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2002ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2003ab784542SHong Zhang     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
20044fe895cdSHong Zhang   } else {
20054de5dceeSHong Zhang     Mat      B;
20064de5dceeSHong Zhang     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
20074de5dceeSHong Zhang     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
20084de5dceeSHong Zhang     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
20094de5dceeSHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
20104de5dceeSHong Zhang     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
20114de5dceeSHong Zhang     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
20124de5dceeSHong Zhang     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
20134de5dceeSHong Zhang     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
20144de5dceeSHong Zhang     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
20154de5dceeSHong Zhang     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
20164de5dceeSHong Zhang     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
20174de5dceeSHong Zhang     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
20184de5dceeSHong Zhang     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
20194de5dceeSHong Zhang     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
20204de5dceeSHong Zhang     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
20214de5dceeSHong Zhang     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
20224fe895cdSHong Zhang   }
20234fe895cdSHong Zhang   PetscFunctionReturn(0);
20244fe895cdSHong Zhang }
20254fe895cdSHong Zhang 
202699cafbc1SBarry Smith #undef __FUNCT__
202799cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_MPIBAIJ"
202899cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
202999cafbc1SBarry Smith {
203099cafbc1SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
203199cafbc1SBarry Smith   PetscErrorCode ierr;
203299cafbc1SBarry Smith 
203399cafbc1SBarry Smith   PetscFunctionBegin;
203499cafbc1SBarry Smith   ierr = MatRealPart(a->A);CHKERRQ(ierr);
203599cafbc1SBarry Smith   ierr = MatRealPart(a->B);CHKERRQ(ierr);
203699cafbc1SBarry Smith   PetscFunctionReturn(0);
203799cafbc1SBarry Smith }
203899cafbc1SBarry Smith 
203999cafbc1SBarry Smith #undef __FUNCT__
204099cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
204199cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
204299cafbc1SBarry Smith {
204399cafbc1SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
204499cafbc1SBarry Smith   PetscErrorCode ierr;
204599cafbc1SBarry Smith 
204699cafbc1SBarry Smith   PetscFunctionBegin;
204799cafbc1SBarry Smith   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
204899cafbc1SBarry Smith   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
204999cafbc1SBarry Smith   PetscFunctionReturn(0);
205099cafbc1SBarry Smith }
205199cafbc1SBarry Smith 
205282094794SBarry Smith #undef __FUNCT__
205382094794SBarry Smith #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
20544aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
20554aa3045dSJed Brown {
20564aa3045dSJed Brown   PetscErrorCode ierr;
20574aa3045dSJed Brown   IS             iscol_local;
20584aa3045dSJed Brown   PetscInt       csize;
20594aa3045dSJed Brown 
20604aa3045dSJed Brown   PetscFunctionBegin;
20614aa3045dSJed Brown   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2062b79d0421SJed Brown   if (call == MAT_REUSE_MATRIX) {
2063b79d0421SJed Brown     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2064e32f2f54SBarry Smith     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2065b79d0421SJed Brown   } else {
20664aa3045dSJed Brown     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2067b79d0421SJed Brown   }
20684aa3045dSJed Brown   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2069b79d0421SJed Brown   if (call == MAT_INITIAL_MATRIX) {
2070b79d0421SJed Brown     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
20716bf464f9SBarry Smith     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2072b79d0421SJed Brown   }
20734aa3045dSJed Brown   PetscFunctionReturn(0);
20744aa3045dSJed Brown }
207529dcf524SDmitry Karpeev extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
20764aa3045dSJed Brown #undef __FUNCT__
2077dd183c9eSJed Brown #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
207882094794SBarry Smith /*
207982094794SBarry Smith   Not great since it makes two copies of the submatrix, first an SeqBAIJ
208082094794SBarry Smith   in local and then by concatenating the local matrices the end result.
20818f46ffcaSHong Zhang   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ().
20828f46ffcaSHong Zhang   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
208382094794SBarry Smith */
20844aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
208582094794SBarry Smith {
208682094794SBarry Smith   PetscErrorCode ierr;
208782094794SBarry Smith   PetscMPIInt    rank,size;
208882094794SBarry Smith   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
208929dcf524SDmitry Karpeev   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
209029dcf524SDmitry Karpeev   Mat            M,Mreuse;
209182094794SBarry Smith   MatScalar      *vwork,*aa;
2092ce94432eSBarry Smith   MPI_Comm       comm;
209329dcf524SDmitry Karpeev   IS             isrow_new, iscol_new;
209429dcf524SDmitry Karpeev   PetscBool      idflag,allrows, allcols;
209582094794SBarry Smith   Mat_SeqBAIJ    *aij;
209682094794SBarry Smith 
209782094794SBarry Smith   PetscFunctionBegin;
2098ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
209982094794SBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
210082094794SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
210129dcf524SDmitry Karpeev   /* The compression and expansion should be avoided. Doesn't point
210229dcf524SDmitry Karpeev      out errors, might change the indices, hence buggey */
210329dcf524SDmitry Karpeev   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
210429dcf524SDmitry Karpeev   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
210582094794SBarry Smith 
210629dcf524SDmitry Karpeev   /* Check for special case: each processor gets entire matrix columns */
210729dcf524SDmitry Karpeev   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
210829dcf524SDmitry Karpeev   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
210926fbe8dcSKarl Rupp   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
211026fbe8dcSKarl Rupp   else allcols = PETSC_FALSE;
211129dcf524SDmitry Karpeev 
211229dcf524SDmitry Karpeev   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
211329dcf524SDmitry Karpeev   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
211426fbe8dcSKarl Rupp   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
211526fbe8dcSKarl Rupp   else allrows = PETSC_FALSE;
211626fbe8dcSKarl Rupp 
211782094794SBarry Smith   if (call ==  MAT_REUSE_MATRIX) {
211882094794SBarry Smith     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2119e32f2f54SBarry Smith     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
212075f6568bSJed Brown     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
212182094794SBarry Smith   } else {
212275f6568bSJed Brown     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
212382094794SBarry Smith   }
212429dcf524SDmitry Karpeev   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
212529dcf524SDmitry Karpeev   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
212682094794SBarry Smith   /*
212782094794SBarry Smith       m - number of local rows
212882094794SBarry Smith       n - number of columns (same on all processors)
212982094794SBarry Smith       rstart - first row in new global matrix generated
213082094794SBarry Smith   */
213182094794SBarry Smith   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
213282094794SBarry Smith   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
213382094794SBarry Smith   m    = m/bs;
213482094794SBarry Smith   n    = n/bs;
213582094794SBarry Smith 
213682094794SBarry Smith   if (call == MAT_INITIAL_MATRIX) {
213782094794SBarry Smith     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
213882094794SBarry Smith     ii  = aij->i;
213982094794SBarry Smith     jj  = aij->j;
214082094794SBarry Smith 
214182094794SBarry Smith     /*
214282094794SBarry Smith         Determine the number of non-zeros in the diagonal and off-diagonal
214382094794SBarry Smith         portions of the matrix in order to do correct preallocation
214482094794SBarry Smith     */
214582094794SBarry Smith 
214682094794SBarry Smith     /* first get start and end of "diagonal" columns */
214782094794SBarry Smith     if (csize == PETSC_DECIDE) {
214882094794SBarry Smith       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
214982094794SBarry Smith       if (mglobal == n*bs) { /* square matrix */
215082094794SBarry Smith         nlocal = m;
215182094794SBarry Smith       } else {
215282094794SBarry Smith         nlocal = n/size + ((n % size) > rank);
215382094794SBarry Smith       }
215482094794SBarry Smith     } else {
215582094794SBarry Smith       nlocal = csize/bs;
215682094794SBarry Smith     }
215782094794SBarry Smith     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
215882094794SBarry Smith     rstart = rend - nlocal;
215965e19b50SBarry Smith     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
216082094794SBarry Smith 
216182094794SBarry Smith     /* next, compute all the lengths */
2162dcca6d9dSJed Brown     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
216382094794SBarry Smith     for (i=0; i<m; i++) {
216482094794SBarry Smith       jend = ii[i+1] - ii[i];
216582094794SBarry Smith       olen = 0;
216682094794SBarry Smith       dlen = 0;
216782094794SBarry Smith       for (j=0; j<jend; j++) {
216882094794SBarry Smith         if (*jj < rstart || *jj >= rend) olen++;
216982094794SBarry Smith         else dlen++;
217082094794SBarry Smith         jj++;
217182094794SBarry Smith       }
217282094794SBarry Smith       olens[i] = olen;
217382094794SBarry Smith       dlens[i] = dlen;
217482094794SBarry Smith     }
217582094794SBarry Smith     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
217682094794SBarry Smith     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
217782094794SBarry Smith     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
217882094794SBarry Smith     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
21798f46ffcaSHong Zhang     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2180eb9baa12SBarry Smith     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
218182094794SBarry Smith   } else {
218282094794SBarry Smith     PetscInt ml,nl;
218382094794SBarry Smith 
218482094794SBarry Smith     M    = *newmat;
218582094794SBarry Smith     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2186e32f2f54SBarry Smith     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
218782094794SBarry Smith     ierr = MatZeroEntries(M);CHKERRQ(ierr);
218882094794SBarry Smith     /*
218982094794SBarry Smith          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
219082094794SBarry Smith        rather than the slower MatSetValues().
219182094794SBarry Smith     */
219282094794SBarry Smith     M->was_assembled = PETSC_TRUE;
219382094794SBarry Smith     M->assembled     = PETSC_FALSE;
219482094794SBarry Smith   }
219582094794SBarry Smith   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
219682094794SBarry Smith   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
219782094794SBarry Smith   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
219882094794SBarry Smith   ii   = aij->i;
219982094794SBarry Smith   jj   = aij->j;
220082094794SBarry Smith   aa   = aij->a;
220182094794SBarry Smith   for (i=0; i<m; i++) {
220282094794SBarry Smith     row   = rstart/bs + i;
220382094794SBarry Smith     nz    = ii[i+1] - ii[i];
220482094794SBarry Smith     cwork = jj;     jj += nz;
220575f6568bSJed Brown     vwork = aa;     aa += nz*bs*bs;
220682094794SBarry Smith     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
220782094794SBarry Smith   }
220882094794SBarry Smith 
220982094794SBarry Smith   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
221082094794SBarry Smith   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
221182094794SBarry Smith   *newmat = M;
221282094794SBarry Smith 
221382094794SBarry Smith   /* save submatrix used in processor for next request */
221482094794SBarry Smith   if (call ==  MAT_INITIAL_MATRIX) {
221582094794SBarry Smith     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
221682094794SBarry Smith     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
221782094794SBarry Smith   }
221882094794SBarry Smith   PetscFunctionReturn(0);
221982094794SBarry Smith }
222082094794SBarry Smith 
222182094794SBarry Smith #undef __FUNCT__
222282094794SBarry Smith #define __FUNCT__ "MatPermute_MPIBAIJ"
222382094794SBarry Smith PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
222482094794SBarry Smith {
222582094794SBarry Smith   MPI_Comm       comm,pcomm;
2226a0a83eb5SRémi Lacroix   PetscInt       clocal_size,nrows;
222782094794SBarry Smith   const PetscInt *rows;
2228dbf0e21dSBarry Smith   PetscMPIInt    size;
2229a0a83eb5SRémi Lacroix   IS             crowp,lcolp;
223082094794SBarry Smith   PetscErrorCode ierr;
223182094794SBarry Smith 
223282094794SBarry Smith   PetscFunctionBegin;
223382094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
223482094794SBarry Smith   /* make a collective version of 'rowp' */
223582094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
223682094794SBarry Smith   if (pcomm==comm) {
223782094794SBarry Smith     crowp = rowp;
223882094794SBarry Smith   } else {
223982094794SBarry Smith     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
224082094794SBarry Smith     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
224170b3c8c7SBarry Smith     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
224282094794SBarry Smith     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
224382094794SBarry Smith   }
2244a0a83eb5SRémi Lacroix   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2245a0a83eb5SRémi Lacroix   /* make a local version of 'colp' */
224682094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2247dbf0e21dSBarry Smith   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2248dbf0e21dSBarry Smith   if (size==1) {
224982094794SBarry Smith     lcolp = colp;
225082094794SBarry Smith   } else {
225175f6568bSJed Brown     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
225282094794SBarry Smith   }
2253dbf0e21dSBarry Smith   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
225475f6568bSJed Brown   /* now we just get the submatrix */
22557afc1a8bSJed Brown   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2256a0a83eb5SRémi Lacroix   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2257a0a83eb5SRémi Lacroix   /* clean up */
2258a0a83eb5SRémi Lacroix   if (pcomm!=comm) {
2259a0a83eb5SRémi Lacroix     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2260a0a83eb5SRémi Lacroix   }
2261dbf0e21dSBarry Smith   if (size>1) {
22626bf464f9SBarry Smith     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
226382094794SBarry Smith   }
226482094794SBarry Smith   PetscFunctionReturn(0);
226582094794SBarry Smith }
226682094794SBarry Smith 
22678c7482ecSBarry Smith #undef __FUNCT__
22688c7482ecSBarry Smith #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
22697087cfbeSBarry Smith PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
22708c7482ecSBarry Smith {
22718c7482ecSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
22728c7482ecSBarry Smith   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
22738c7482ecSBarry Smith 
22748c7482ecSBarry Smith   PetscFunctionBegin;
227526fbe8dcSKarl Rupp   if (nghosts) *nghosts = B->nbs;
227626fbe8dcSKarl Rupp   if (ghosts) *ghosts = baij->garray;
22778c7482ecSBarry Smith   PetscFunctionReturn(0);
22788c7482ecSBarry Smith }
22798c7482ecSBarry Smith 
2280f6d58c54SBarry Smith #undef __FUNCT__
2281d1adec66SJed Brown #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2282d1adec66SJed Brown PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2283f6d58c54SBarry Smith {
2284f6d58c54SBarry Smith   Mat            B;
2285f6d58c54SBarry Smith   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2286f6d58c54SBarry Smith   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2287f6d58c54SBarry Smith   Mat_SeqAIJ     *b;
2288f6d58c54SBarry Smith   PetscErrorCode ierr;
2289f6d58c54SBarry Smith   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2290f6d58c54SBarry Smith   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2291f6d58c54SBarry Smith   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2292f6d58c54SBarry Smith 
2293f6d58c54SBarry Smith   PetscFunctionBegin;
2294ce94432eSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2295ce94432eSBarry Smith   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2296f6d58c54SBarry Smith 
2297f6d58c54SBarry Smith   /* ----------------------------------------------------------------
2298f6d58c54SBarry Smith      Tell every processor the number of nonzeros per row
2299f6d58c54SBarry Smith   */
2300854ce69bSBarry Smith   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2301f6d58c54SBarry Smith   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2302f6d58c54SBarry Smith     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2303f6d58c54SBarry Smith   }
2304f6d58c54SBarry Smith   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2305785e854fSJed Brown   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2306f6d58c54SBarry Smith   displs    = recvcounts + size;
2307f6d58c54SBarry Smith   for (i=0; i<size; i++) {
2308f6d58c54SBarry Smith     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2309f6d58c54SBarry Smith     displs[i]     = A->rmap->range[i]/bs;
2310f6d58c54SBarry Smith   }
2311f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE)
2312ce94432eSBarry Smith   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2313f6d58c54SBarry Smith #else
2314ce94432eSBarry Smith   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2315f6d58c54SBarry Smith #endif
2316f6d58c54SBarry Smith   /* ---------------------------------------------------------------
2317f6d58c54SBarry Smith      Create the sequential matrix of the same type as the local block diagonal
2318f6d58c54SBarry Smith   */
2319f6d58c54SBarry Smith   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2320f6d58c54SBarry Smith   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2321f6d58c54SBarry Smith   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2322f6d58c54SBarry Smith   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2323f6d58c54SBarry Smith   b    = (Mat_SeqAIJ*)B->data;
2324f6d58c54SBarry Smith 
2325f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2326f6d58c54SBarry Smith     Copy my part of matrix column indices over
2327f6d58c54SBarry Smith   */
2328f6d58c54SBarry Smith   sendcount  = ad->nz + bd->nz;
2329f6d58c54SBarry Smith   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2330f6d58c54SBarry Smith   a_jsendbuf = ad->j;
2331f6d58c54SBarry Smith   b_jsendbuf = bd->j;
2332f6d58c54SBarry Smith   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2333f6d58c54SBarry Smith   cnt        = 0;
2334f6d58c54SBarry Smith   for (i=0; i<n; i++) {
2335f6d58c54SBarry Smith 
2336f6d58c54SBarry Smith     /* put in lower diagonal portion */
2337f6d58c54SBarry Smith     m = bd->i[i+1] - bd->i[i];
2338f6d58c54SBarry Smith     while (m > 0) {
2339f6d58c54SBarry Smith       /* is it above diagonal (in bd (compressed) numbering) */
2340f6d58c54SBarry Smith       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2341f6d58c54SBarry Smith       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2342f6d58c54SBarry Smith       m--;
2343f6d58c54SBarry Smith     }
2344f6d58c54SBarry Smith 
2345f6d58c54SBarry Smith     /* put in diagonal portion */
2346f6d58c54SBarry Smith     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2347f6d58c54SBarry Smith       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2348f6d58c54SBarry Smith     }
2349f6d58c54SBarry Smith 
2350f6d58c54SBarry Smith     /* put in upper diagonal portion */
2351f6d58c54SBarry Smith     while (m-- > 0) {
2352f6d58c54SBarry Smith       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2353f6d58c54SBarry Smith     }
2354f6d58c54SBarry Smith   }
2355e32f2f54SBarry Smith   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2356f6d58c54SBarry Smith 
2357f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2358f6d58c54SBarry Smith     Gather all column indices to all processors
2359f6d58c54SBarry Smith   */
2360f6d58c54SBarry Smith   for (i=0; i<size; i++) {
2361f6d58c54SBarry Smith     recvcounts[i] = 0;
2362f6d58c54SBarry Smith     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2363f6d58c54SBarry Smith       recvcounts[i] += lens[j];
2364f6d58c54SBarry Smith     }
2365f6d58c54SBarry Smith   }
2366f6d58c54SBarry Smith   displs[0] = 0;
2367f6d58c54SBarry Smith   for (i=1; i<size; i++) {
2368f6d58c54SBarry Smith     displs[i] = displs[i-1] + recvcounts[i-1];
2369f6d58c54SBarry Smith   }
2370f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE)
2371ce94432eSBarry Smith   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2372f6d58c54SBarry Smith #else
2373ce94432eSBarry Smith   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2374f6d58c54SBarry Smith #endif
2375f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2376f6d58c54SBarry Smith     Assemble the matrix into useable form (note numerical values not yet set)
2377f6d58c54SBarry Smith   */
2378f6d58c54SBarry Smith   /* set the b->ilen (length of each row) values */
2379f6d58c54SBarry Smith   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2380f6d58c54SBarry Smith   /* set the b->i indices */
2381f6d58c54SBarry Smith   b->i[0] = 0;
2382f6d58c54SBarry Smith   for (i=1; i<=A->rmap->N/bs; i++) {
2383f6d58c54SBarry Smith     b->i[i] = b->i[i-1] + lens[i-1];
2384f6d58c54SBarry Smith   }
2385f6d58c54SBarry Smith   ierr = PetscFree(lens);CHKERRQ(ierr);
2386f6d58c54SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387f6d58c54SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388f6d58c54SBarry Smith   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2389f6d58c54SBarry Smith 
2390f6d58c54SBarry Smith   if (A->symmetric) {
2391f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2392f6d58c54SBarry Smith   } else if (A->hermitian) {
2393f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2394f6d58c54SBarry Smith   } else if (A->structurally_symmetric) {
2395f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2396f6d58c54SBarry Smith   }
2397f6d58c54SBarry Smith   *newmat = B;
2398f6d58c54SBarry Smith   PetscFunctionReturn(0);
2399f6d58c54SBarry Smith }
2400f6d58c54SBarry Smith 
2401b1a666ecSBarry Smith #undef __FUNCT__
2402b1a666ecSBarry Smith #define __FUNCT__ "MatSOR_MPIBAIJ"
2403b1a666ecSBarry Smith PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2404b1a666ecSBarry Smith {
2405b1a666ecSBarry Smith   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2406b1a666ecSBarry Smith   PetscErrorCode ierr;
2407b1a666ecSBarry Smith   Vec            bb1 = 0;
2408b1a666ecSBarry Smith 
2409b1a666ecSBarry Smith   PetscFunctionBegin;
2410b1a666ecSBarry Smith   if (flag == SOR_APPLY_UPPER) {
2411b1a666ecSBarry Smith     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2412b1a666ecSBarry Smith     PetscFunctionReturn(0);
2413b1a666ecSBarry Smith   }
2414b1a666ecSBarry Smith 
24154e980039SJed Brown   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
24164e980039SJed Brown     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
24174e980039SJed Brown   }
24184e980039SJed Brown 
2419b1a666ecSBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2420b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2421b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2422b1a666ecSBarry Smith       its--;
2423b1a666ecSBarry Smith     }
2424b1a666ecSBarry Smith 
2425b1a666ecSBarry Smith     while (its--) {
2426b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2427b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2428b1a666ecSBarry Smith 
2429b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2430b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2431b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2432b1a666ecSBarry Smith 
2433b1a666ecSBarry Smith       /* local sweep */
2434b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2435b1a666ecSBarry Smith     }
2436b1a666ecSBarry Smith   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2437b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2438b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2439b1a666ecSBarry Smith       its--;
2440b1a666ecSBarry Smith     }
2441b1a666ecSBarry Smith     while (its--) {
2442b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2443b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2444b1a666ecSBarry Smith 
2445b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2446b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2447b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2448b1a666ecSBarry Smith 
2449b1a666ecSBarry Smith       /* local sweep */
2450b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2451b1a666ecSBarry Smith     }
2452b1a666ecSBarry Smith   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2453b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2454b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2455b1a666ecSBarry Smith       its--;
2456b1a666ecSBarry Smith     }
2457b1a666ecSBarry Smith     while (its--) {
2458b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2459b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2460b1a666ecSBarry Smith 
2461b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2462b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2463b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2464b1a666ecSBarry Smith 
2465b1a666ecSBarry Smith       /* local sweep */
2466b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2467b1a666ecSBarry Smith     }
2468ce94432eSBarry Smith   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2469b1a666ecSBarry Smith 
24706bf464f9SBarry Smith   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2471b1a666ecSBarry Smith   PetscFunctionReturn(0);
2472b1a666ecSBarry Smith }
2473b1a666ecSBarry Smith 
2474bbead8a2SBarry Smith #undef __FUNCT__
247547f7623dSRémi Lacroix #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
247647f7623dSRémi Lacroix PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
247747f7623dSRémi Lacroix {
247847f7623dSRémi Lacroix   PetscErrorCode ierr;
247947f7623dSRémi Lacroix   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
248047f7623dSRémi Lacroix   PetscInt       N,i,*garray = aij->garray;
248147f7623dSRémi Lacroix   PetscInt       ib,jb,bs = A->rmap->bs;
248247f7623dSRémi Lacroix   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
248347f7623dSRémi Lacroix   MatScalar      *a_val = a_aij->a;
248447f7623dSRémi Lacroix   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
248547f7623dSRémi Lacroix   MatScalar      *b_val = b_aij->a;
248647f7623dSRémi Lacroix   PetscReal      *work;
248747f7623dSRémi Lacroix 
248847f7623dSRémi Lacroix   PetscFunctionBegin;
248947f7623dSRémi Lacroix   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
24901795a4d1SJed Brown   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
249147f7623dSRémi Lacroix   if (type == NORM_2) {
249247f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
249347f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
249447f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
249547f7623dSRémi Lacroix           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
249647f7623dSRémi Lacroix           a_val++;
249747f7623dSRémi Lacroix         }
249847f7623dSRémi Lacroix       }
249947f7623dSRémi Lacroix     }
250047f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
250147f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
250247f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
250347f7623dSRémi Lacroix           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
250447f7623dSRémi Lacroix           b_val++;
250547f7623dSRémi Lacroix         }
250647f7623dSRémi Lacroix       }
250747f7623dSRémi Lacroix     }
250847f7623dSRémi Lacroix   } else if (type == NORM_1) {
250947f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
251047f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
251147f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
251247f7623dSRémi Lacroix           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
251347f7623dSRémi Lacroix           a_val++;
251447f7623dSRémi Lacroix         }
251547f7623dSRémi Lacroix       }
251647f7623dSRémi Lacroix     }
251747f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
251847f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
251947f7623dSRémi Lacroix        for (ib=0; ib<bs; ib++) {
252047f7623dSRémi Lacroix           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
252147f7623dSRémi Lacroix           b_val++;
252247f7623dSRémi Lacroix         }
252347f7623dSRémi Lacroix       }
252447f7623dSRémi Lacroix     }
252547f7623dSRémi Lacroix   } else if (type == NORM_INFINITY) {
252647f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
252747f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
252847f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
252947f7623dSRémi Lacroix           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
253047f7623dSRémi Lacroix           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
253147f7623dSRémi Lacroix           a_val++;
253247f7623dSRémi Lacroix         }
253347f7623dSRémi Lacroix       }
253447f7623dSRémi Lacroix     }
253547f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
253647f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
253747f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
253847f7623dSRémi Lacroix           int col = garray[b_aij->j[i]] * bs + jb;
253947f7623dSRémi Lacroix           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
254047f7623dSRémi Lacroix           b_val++;
254147f7623dSRémi Lacroix         }
254247f7623dSRémi Lacroix       }
254347f7623dSRémi Lacroix     }
254447f7623dSRémi Lacroix   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
254547f7623dSRémi Lacroix   if (type == NORM_INFINITY) {
254647f7623dSRémi Lacroix     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
254747f7623dSRémi Lacroix   } else {
254847f7623dSRémi Lacroix     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
254947f7623dSRémi Lacroix   }
255047f7623dSRémi Lacroix   ierr = PetscFree(work);CHKERRQ(ierr);
255147f7623dSRémi Lacroix   if (type == NORM_2) {
255247f7623dSRémi Lacroix     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
255347f7623dSRémi Lacroix   }
255447f7623dSRémi Lacroix   PetscFunctionReturn(0);
255547f7623dSRémi Lacroix }
255647f7623dSRémi Lacroix 
255747f7623dSRémi Lacroix #undef __FUNCT__
2558bbead8a2SBarry Smith #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2559713ccfa9SJed Brown PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2560bbead8a2SBarry Smith {
2561bbead8a2SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2562bbead8a2SBarry Smith   PetscErrorCode ierr;
2563bbead8a2SBarry Smith 
2564bbead8a2SBarry Smith   PetscFunctionBegin;
2565bbead8a2SBarry Smith   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2566bbead8a2SBarry Smith   PetscFunctionReturn(0);
2567bbead8a2SBarry Smith }
2568bbead8a2SBarry Smith 
25698c7482ecSBarry Smith 
257079bdfe76SSatish Balay /* -------------------------------------------------------------------*/
25713964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2572cc2dc46cSBarry Smith                                        MatGetRow_MPIBAIJ,
2573cc2dc46cSBarry Smith                                        MatRestoreRow_MPIBAIJ,
2574cc2dc46cSBarry Smith                                        MatMult_MPIBAIJ,
257597304618SKris Buschelman                                 /* 4*/ MatMultAdd_MPIBAIJ,
25767c922b88SBarry Smith                                        MatMultTranspose_MPIBAIJ,
25777c922b88SBarry Smith                                        MatMultTransposeAdd_MPIBAIJ,
2578cc2dc46cSBarry Smith                                        0,
2579cc2dc46cSBarry Smith                                        0,
2580cc2dc46cSBarry Smith                                        0,
258197304618SKris Buschelman                                 /*10*/ 0,
2582cc2dc46cSBarry Smith                                        0,
2583cc2dc46cSBarry Smith                                        0,
2584b1a666ecSBarry Smith                                        MatSOR_MPIBAIJ,
2585cc2dc46cSBarry Smith                                        MatTranspose_MPIBAIJ,
258697304618SKris Buschelman                                 /*15*/ MatGetInfo_MPIBAIJ,
25877fc3c18eSBarry Smith                                        MatEqual_MPIBAIJ,
2588cc2dc46cSBarry Smith                                        MatGetDiagonal_MPIBAIJ,
2589cc2dc46cSBarry Smith                                        MatDiagonalScale_MPIBAIJ,
2590cc2dc46cSBarry Smith                                        MatNorm_MPIBAIJ,
259197304618SKris Buschelman                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2592cc2dc46cSBarry Smith                                        MatAssemblyEnd_MPIBAIJ,
2593cc2dc46cSBarry Smith                                        MatSetOption_MPIBAIJ,
2594cc2dc46cSBarry Smith                                        MatZeroEntries_MPIBAIJ,
2595d519adbfSMatthew Knepley                                 /*24*/ MatZeroRows_MPIBAIJ,
2596cc2dc46cSBarry Smith                                        0,
2597cc2dc46cSBarry Smith                                        0,
2598cc2dc46cSBarry Smith                                        0,
2599cc2dc46cSBarry Smith                                        0,
26004994cf47SJed Brown                                 /*29*/ MatSetUp_MPIBAIJ,
2601273d9f13SBarry Smith                                        0,
2602cc2dc46cSBarry Smith                                        0,
2603cc2dc46cSBarry Smith                                        0,
2604cc2dc46cSBarry Smith                                        0,
2605d519adbfSMatthew Knepley                                 /*34*/ MatDuplicate_MPIBAIJ,
2606cc2dc46cSBarry Smith                                        0,
2607cc2dc46cSBarry Smith                                        0,
2608cc2dc46cSBarry Smith                                        0,
2609cc2dc46cSBarry Smith                                        0,
2610d519adbfSMatthew Knepley                                 /*39*/ MatAXPY_MPIBAIJ,
2611cc2dc46cSBarry Smith                                        MatGetSubMatrices_MPIBAIJ,
2612cc2dc46cSBarry Smith                                        MatIncreaseOverlap_MPIBAIJ,
2613cc2dc46cSBarry Smith                                        MatGetValues_MPIBAIJ,
26143c896bc6SHong Zhang                                        MatCopy_MPIBAIJ,
2615d519adbfSMatthew Knepley                                 /*44*/ 0,
2616cc2dc46cSBarry Smith                                        MatScale_MPIBAIJ,
2617cc2dc46cSBarry Smith                                        0,
2618cc2dc46cSBarry Smith                                        0,
26196f0a72daSMatthew G. Knepley                                        MatZeroRowsColumns_MPIBAIJ,
2620f73d5cc4SBarry Smith                                 /*49*/ 0,
2621cc2dc46cSBarry Smith                                        0,
2622cc2dc46cSBarry Smith                                        0,
2623cc2dc46cSBarry Smith                                        0,
2624cc2dc46cSBarry Smith                                        0,
262593dfae19SHong Zhang                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2626cc2dc46cSBarry Smith                                        0,
2627cc2dc46cSBarry Smith                                        MatSetUnfactored_MPIBAIJ,
262882094794SBarry Smith                                        MatPermute_MPIBAIJ,
2629cc2dc46cSBarry Smith                                        MatSetValuesBlocked_MPIBAIJ,
2630d519adbfSMatthew Knepley                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2631f14a1c24SBarry Smith                                        MatDestroy_MPIBAIJ,
2632f14a1c24SBarry Smith                                        MatView_MPIBAIJ,
2633357abbc8SBarry Smith                                        0,
26347843d17aSBarry Smith                                        0,
2635d519adbfSMatthew Knepley                                 /*64*/ 0,
26367843d17aSBarry Smith                                        0,
26377843d17aSBarry Smith                                        0,
26387843d17aSBarry Smith                                        0,
26397843d17aSBarry Smith                                        0,
2640d519adbfSMatthew Knepley                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
26417843d17aSBarry Smith                                        0,
264297304618SKris Buschelman                                        0,
264397304618SKris Buschelman                                        0,
264497304618SKris Buschelman                                        0,
2645d519adbfSMatthew Knepley                                 /*74*/ 0,
2646f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
264797304618SKris Buschelman                                        0,
264897304618SKris Buschelman                                        0,
264997304618SKris Buschelman                                        0,
2650d519adbfSMatthew Knepley                                 /*79*/ 0,
265197304618SKris Buschelman                                        0,
265297304618SKris Buschelman                                        0,
265397304618SKris Buschelman                                        0,
26545bba2384SShri Abhyankar                                        MatLoad_MPIBAIJ,
2655d519adbfSMatthew Knepley                                 /*84*/ 0,
2656865e5f61SKris Buschelman                                        0,
2657865e5f61SKris Buschelman                                        0,
2658865e5f61SKris Buschelman                                        0,
2659865e5f61SKris Buschelman                                        0,
2660d519adbfSMatthew Knepley                                 /*89*/ 0,
2661865e5f61SKris Buschelman                                        0,
2662865e5f61SKris Buschelman                                        0,
2663865e5f61SKris Buschelman                                        0,
2664865e5f61SKris Buschelman                                        0,
2665d519adbfSMatthew Knepley                                 /*94*/ 0,
2666865e5f61SKris Buschelman                                        0,
2667865e5f61SKris Buschelman                                        0,
266899cafbc1SBarry Smith                                        0,
266999cafbc1SBarry Smith                                        0,
2670d519adbfSMatthew Knepley                                 /*99*/ 0,
267199cafbc1SBarry Smith                                        0,
267299cafbc1SBarry Smith                                        0,
267399cafbc1SBarry Smith                                        0,
267499cafbc1SBarry Smith                                        0,
2675d519adbfSMatthew Knepley                                 /*104*/0,
267699cafbc1SBarry Smith                                        MatRealPart_MPIBAIJ,
26778c7482ecSBarry Smith                                        MatImaginaryPart_MPIBAIJ,
26788c7482ecSBarry Smith                                        0,
26798c7482ecSBarry Smith                                        0,
2680d519adbfSMatthew Knepley                                 /*109*/0,
26818c7482ecSBarry Smith                                        0,
26828c7482ecSBarry Smith                                        0,
26838c7482ecSBarry Smith                                        0,
26848c7482ecSBarry Smith                                        0,
2685d1adec66SJed Brown                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
26868c7482ecSBarry Smith                                        0,
26874683f7a4SShri Abhyankar                                        MatGetGhosts_MPIBAIJ,
26884683f7a4SShri Abhyankar                                        0,
26894683f7a4SShri Abhyankar                                        0,
26904683f7a4SShri Abhyankar                                 /*119*/0,
26914683f7a4SShri Abhyankar                                        0,
26924683f7a4SShri Abhyankar                                        0,
2693bbead8a2SBarry Smith                                        0,
2694e8271787SHong Zhang                                        MatGetMultiProcBlock_MPIBAIJ,
2695bbead8a2SBarry Smith                                 /*124*/0,
269647f7623dSRémi Lacroix                                        MatGetColumnNorms_MPIBAIJ,
26973964eb88SJed Brown                                        MatInvertBlockDiagonal_MPIBAIJ,
26983964eb88SJed Brown                                        0,
26993964eb88SJed Brown                                        0,
27003964eb88SJed Brown                                /*129*/ 0,
27013964eb88SJed Brown                                        0,
27023964eb88SJed Brown                                        0,
27033964eb88SJed Brown                                        0,
27043964eb88SJed Brown                                        0,
27053964eb88SJed Brown                                /*134*/ 0,
27063964eb88SJed Brown                                        0,
27073964eb88SJed Brown                                        0,
27083964eb88SJed Brown                                        0,
27093964eb88SJed Brown                                        0,
27103964eb88SJed Brown                                /*139*/ 0,
2711f9426fe0SMark Adams                                        0,
27121919a2e2SJed Brown                                        0,
2713bdf6f3fcSHong Zhang                                        MatFDColoringSetUp_MPIXAIJ,
2714bdf6f3fcSHong Zhang                                        0,
2715bdf6f3fcSHong Zhang                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
27168c7482ecSBarry Smith };
271779bdfe76SSatish Balay 
27184a2ae208SSatish Balay #undef __FUNCT__
27194a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
272011bd1e4dSLisandro Dalcin PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
27215ef9f2a5SBarry Smith {
27225ef9f2a5SBarry Smith   PetscFunctionBegin;
27235ef9f2a5SBarry Smith   *a = ((Mat_MPIBAIJ*)A->data)->A;
27245ef9f2a5SBarry Smith   PetscFunctionReturn(0);
27255ef9f2a5SBarry Smith }
272679bdfe76SSatish Balay 
27278cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2728d94109b8SHong Zhang 
2729aac34f13SBarry Smith #undef __FUNCT__
2730aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2731cf12db73SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2732aac34f13SBarry Smith {
2733b8d659d7SLisandro Dalcin   PetscInt       m,rstart,cstart,cend;
2734b8d659d7SLisandro Dalcin   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2735b8d659d7SLisandro Dalcin   const PetscInt *JJ    =0;
2736b8d659d7SLisandro Dalcin   PetscScalar    *values=0;
2737d47bf9aaSJed Brown   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2738aac34f13SBarry Smith   PetscErrorCode ierr;
2739aac34f13SBarry Smith 
2740aac34f13SBarry Smith   PetscFunctionBegin;
274126283091SBarry Smith   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
274226283091SBarry Smith   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
274326283091SBarry Smith   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
274426283091SBarry Smith   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2745e02043d6SBarry Smith   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2746d0f46423SBarry Smith   m      = B->rmap->n/bs;
2747d0f46423SBarry Smith   rstart = B->rmap->rstart/bs;
2748d0f46423SBarry Smith   cstart = B->cmap->rstart/bs;
2749d0f46423SBarry Smith   cend   = B->cmap->rend/bs;
2750b8d659d7SLisandro Dalcin 
2751e32f2f54SBarry Smith   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2752dcca6d9dSJed Brown   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2753aac34f13SBarry Smith   for (i=0; i<m; i++) {
2754cf12db73SBarry Smith     nz = ii[i+1] - ii[i];
2755e32f2f54SBarry Smith     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2756b8d659d7SLisandro Dalcin     nz_max = PetscMax(nz_max,nz);
2757cf12db73SBarry Smith     JJ     = jj + ii[i];
2758b8d659d7SLisandro Dalcin     for (j=0; j<nz; j++) {
2759aac34f13SBarry Smith       if (*JJ >= cstart) break;
2760aac34f13SBarry Smith       JJ++;
2761aac34f13SBarry Smith     }
2762aac34f13SBarry Smith     d = 0;
2763b8d659d7SLisandro Dalcin     for (; j<nz; j++) {
2764aac34f13SBarry Smith       if (*JJ++ >= cend) break;
2765aac34f13SBarry Smith       d++;
2766aac34f13SBarry Smith     }
2767aac34f13SBarry Smith     d_nnz[i] = d;
2768b8d659d7SLisandro Dalcin     o_nnz[i] = nz - d;
2769aac34f13SBarry Smith   }
2770aac34f13SBarry Smith   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2771fca92195SBarry Smith   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2772aac34f13SBarry Smith 
2773b8d659d7SLisandro Dalcin   values = (PetscScalar*)V;
2774b8d659d7SLisandro Dalcin   if (!values) {
2775785e854fSJed Brown     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2776b8d659d7SLisandro Dalcin     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2777b8d659d7SLisandro Dalcin   }
2778b8d659d7SLisandro Dalcin   for (i=0; i<m; i++) {
2779b8d659d7SLisandro Dalcin     PetscInt          row    = i + rstart;
2780cf12db73SBarry Smith     PetscInt          ncols  = ii[i+1] - ii[i];
2781cf12db73SBarry Smith     const PetscInt    *icols = jj + ii[i];
27823adadaf3SJed Brown     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2783cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2784b8d659d7SLisandro Dalcin       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
27853adadaf3SJed Brown     } else {                    /* block ordering does not match so we can only insert one block at a time. */
27863adadaf3SJed Brown       PetscInt j;
27873adadaf3SJed Brown       for (j=0; j<ncols; j++) {
27883adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
27893adadaf3SJed Brown         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
27903adadaf3SJed Brown       }
27913adadaf3SJed Brown     }
2792aac34f13SBarry Smith   }
2793aac34f13SBarry Smith 
2794b8d659d7SLisandro Dalcin   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2795aac34f13SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2796aac34f13SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
27977827cd58SJed Brown   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2798aac34f13SBarry Smith   PetscFunctionReturn(0);
2799aac34f13SBarry Smith }
2800aac34f13SBarry Smith 
2801aac34f13SBarry Smith #undef __FUNCT__
2802aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2803aac34f13SBarry Smith /*@C
2804dfb205c3SBarry Smith    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2805aac34f13SBarry Smith    (the default parallel PETSc format).
2806aac34f13SBarry Smith 
2807aac34f13SBarry Smith    Collective on MPI_Comm
2808aac34f13SBarry Smith 
2809aac34f13SBarry Smith    Input Parameters:
28101c4f3114SJed Brown +  B - the matrix
2811dfb205c3SBarry Smith .  bs - the block size
2812aac34f13SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2813aac34f13SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2814aac34f13SBarry Smith -  v - optional values in the matrix
2815aac34f13SBarry Smith 
2816aac34f13SBarry Smith    Level: developer
2817aac34f13SBarry Smith 
28183adadaf3SJed Brown    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
28193adadaf3SJed Brown    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
28203adadaf3SJed Brown    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
28213adadaf3SJed Brown    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
28223adadaf3SJed Brown    block column and the second index is over columns within a block.
28233adadaf3SJed Brown 
2824aac34f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel
2825aac34f13SBarry Smith 
28263adadaf3SJed Brown .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2827aac34f13SBarry Smith @*/
28287087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2829aac34f13SBarry Smith {
28304ac538c5SBarry Smith   PetscErrorCode ierr;
2831aac34f13SBarry Smith 
2832aac34f13SBarry Smith   PetscFunctionBegin;
28336ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
28346ba663aaSJed Brown   PetscValidType(B,1);
28356ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
28364ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2837aac34f13SBarry Smith   PetscFunctionReturn(0);
2838aac34f13SBarry Smith }
2839aac34f13SBarry Smith 
28404a2ae208SSatish Balay #undef __FUNCT__
2841a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2842b2573a8aSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2843a23d5eceSKris Buschelman {
2844a23d5eceSKris Buschelman   Mat_MPIBAIJ    *b;
2845dfbe8321SBarry Smith   PetscErrorCode ierr;
2846535b19f3SBarry Smith   PetscInt       i;
2847a23d5eceSKris Buschelman 
2848a23d5eceSKris Buschelman   PetscFunctionBegin;
284933d57670SJed Brown   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
285026283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
285126283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2852e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2853899cda47SBarry Smith 
2854a23d5eceSKris Buschelman   if (d_nnz) {
2855d0f46423SBarry Smith     for (i=0; i<B->rmap->n/bs; i++) {
2856e32f2f54SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2857a23d5eceSKris Buschelman     }
2858a23d5eceSKris Buschelman   }
2859a23d5eceSKris Buschelman   if (o_nnz) {
2860d0f46423SBarry Smith     for (i=0; i<B->rmap->n/bs; i++) {
2861e32f2f54SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2862a23d5eceSKris Buschelman     }
2863a23d5eceSKris Buschelman   }
2864a23d5eceSKris Buschelman 
2865a23d5eceSKris Buschelman   b      = (Mat_MPIBAIJ*)B->data;
2866a23d5eceSKris Buschelman   b->bs2 = bs*bs;
2867d0f46423SBarry Smith   b->mbs = B->rmap->n/bs;
2868d0f46423SBarry Smith   b->nbs = B->cmap->n/bs;
2869d0f46423SBarry Smith   b->Mbs = B->rmap->N/bs;
2870d0f46423SBarry Smith   b->Nbs = B->cmap->N/bs;
2871a23d5eceSKris Buschelman 
2872a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2873d0f46423SBarry Smith     b->rangebs[i] = B->rmap->range[i]/bs;
2874a23d5eceSKris Buschelman   }
2875d0f46423SBarry Smith   b->rstartbs = B->rmap->rstart/bs;
2876d0f46423SBarry Smith   b->rendbs   = B->rmap->rend/bs;
2877d0f46423SBarry Smith   b->cstartbs = B->cmap->rstart/bs;
2878d0f46423SBarry Smith   b->cendbs   = B->cmap->rend/bs;
2879a23d5eceSKris Buschelman 
2880526dfc15SBarry Smith   if (!B->preallocated) {
2881f69a0ea3SMatthew Knepley     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2882d0f46423SBarry Smith     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
28839c097c71SKris Buschelman     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
28843bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2885f69a0ea3SMatthew Knepley     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2886d0f46423SBarry Smith     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
28879c097c71SKris Buschelman     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
28883bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2889ce94432eSBarry Smith     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2890526dfc15SBarry Smith   }
2891a23d5eceSKris Buschelman 
2892526dfc15SBarry Smith   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2893526dfc15SBarry Smith   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2894526dfc15SBarry Smith   B->preallocated = PETSC_TRUE;
2895a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2896a23d5eceSKris Buschelman }
2897a23d5eceSKris Buschelman 
28987087cfbeSBarry Smith extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
28997087cfbeSBarry Smith extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
29005bf65638SKris Buschelman 
290182094794SBarry Smith #undef __FUNCT__
290282094794SBarry Smith #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
29038cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
290482094794SBarry Smith {
290582094794SBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
290682094794SBarry Smith   PetscErrorCode ierr;
290782094794SBarry Smith   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
290882094794SBarry Smith   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
290982094794SBarry Smith   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
291082094794SBarry Smith 
291182094794SBarry Smith   PetscFunctionBegin;
2912854ce69bSBarry Smith   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
291382094794SBarry Smith   ii[0] = 0;
291482094794SBarry Smith   for (i=0; i<M; i++) {
2915e32f2f54SBarry Smith     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2916e32f2f54SBarry Smith     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
291782094794SBarry Smith     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
29185ee9ba1cSJed Brown     /* remove one from count of matrix has diagonal */
29195ee9ba1cSJed Brown     for (j=id[i]; j<id[i+1]; j++) {
29205ee9ba1cSJed Brown       if (jd[j] == i) {ii[i+1]--;break;}
29215ee9ba1cSJed Brown     }
292282094794SBarry Smith   }
2923785e854fSJed Brown   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
292482094794SBarry Smith   cnt  = 0;
292582094794SBarry Smith   for (i=0; i<M; i++) {
292682094794SBarry Smith     for (j=io[i]; j<io[i+1]; j++) {
292782094794SBarry Smith       if (garray[jo[j]] > rstart) break;
292882094794SBarry Smith       jj[cnt++] = garray[jo[j]];
292982094794SBarry Smith     }
293082094794SBarry Smith     for (k=id[i]; k<id[i+1]; k++) {
29315ee9ba1cSJed Brown       if (jd[k] != i) {
293282094794SBarry Smith         jj[cnt++] = rstart + jd[k];
293382094794SBarry Smith       }
29345ee9ba1cSJed Brown     }
293582094794SBarry Smith     for (; j<io[i+1]; j++) {
293682094794SBarry Smith       jj[cnt++] = garray[jo[j]];
293782094794SBarry Smith     }
293882094794SBarry Smith   }
2939ce94432eSBarry Smith   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
294082094794SBarry Smith   PetscFunctionReturn(0);
294182094794SBarry Smith }
294282094794SBarry Smith 
2943c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
294462471d69SBarry Smith 
29458cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2946b2573a8aSBarry Smith 
294762471d69SBarry Smith #undef __FUNCT__
294862471d69SBarry Smith #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
29498cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
295062471d69SBarry Smith {
295162471d69SBarry Smith   PetscErrorCode ierr;
295262471d69SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
295362471d69SBarry Smith   Mat            B;
295485a69837SSatish Balay   Mat_MPIAIJ     *b;
295562471d69SBarry Smith 
295662471d69SBarry Smith   PetscFunctionBegin;
2957ce94432eSBarry Smith   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
295862471d69SBarry Smith 
2959ce94432eSBarry Smith   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
29606d0a4a0eSHong Zhang   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2961f090d951SRémi Lacroix   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2962f090d951SRémi Lacroix   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
29630298fd71SBarry Smith   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
29640298fd71SBarry Smith   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
296562471d69SBarry Smith   b    = (Mat_MPIAIJ*) B->data;
296662471d69SBarry Smith 
29676bf464f9SBarry Smith   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
29686bf464f9SBarry Smith   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2969ab9863d7SBarry Smith   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
297062471d69SBarry Smith   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
297162471d69SBarry Smith   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
297262471d69SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
297362471d69SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
29746a719282SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
29756a719282SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
297662471d69SBarry Smith   if (reuse == MAT_REUSE_MATRIX) {
297762471d69SBarry Smith     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
297862471d69SBarry Smith   } else {
297962471d69SBarry Smith    *newmat = B;
298062471d69SBarry Smith   }
298162471d69SBarry Smith   PetscFunctionReturn(0);
298262471d69SBarry Smith }
298362471d69SBarry Smith 
29840bad9183SKris Buschelman /*MC
2985fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
29860bad9183SKris Buschelman 
29870bad9183SKris Buschelman    Options Database Keys:
29888c07d4e3SBarry Smith + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
29898c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix
29908c07d4e3SBarry Smith - -mat_use_hash_table <fact>
29910bad9183SKris Buschelman 
29920bad9183SKris Buschelman   Level: beginner
29930bad9183SKris Buschelman 
29940bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ
29950bad9183SKris Buschelman M*/
29960bad9183SKris Buschelman 
29978cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2998c0cdd4a1SDahai Guo 
2999a23d5eceSKris Buschelman #undef __FUNCT__
30004a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ"
30018cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3002273d9f13SBarry Smith {
3003273d9f13SBarry Smith   Mat_MPIBAIJ    *b;
3004dfbe8321SBarry Smith   PetscErrorCode ierr;
300594ae4db5SBarry Smith   PetscBool      flg = PETSC_FALSE;
3006273d9f13SBarry Smith 
3007273d9f13SBarry Smith   PetscFunctionBegin;
3008b00a9115SJed Brown   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
300982502324SSatish Balay   B->data = (void*)b;
301082502324SSatish Balay 
3011273d9f13SBarry Smith   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3012273d9f13SBarry Smith   B->assembled = PETSC_FALSE;
3013273d9f13SBarry Smith 
3014273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
3015ce94432eSBarry Smith   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3016ce94432eSBarry Smith   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3017273d9f13SBarry Smith 
3018273d9f13SBarry Smith   /* build local table of row and column ownerships */
3019854ce69bSBarry Smith   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3020273d9f13SBarry Smith 
3021273d9f13SBarry Smith   /* build cache for off array entries formed */
3022ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
302326fbe8dcSKarl Rupp 
3024273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
30250298fd71SBarry Smith   b->colmap      = NULL;
30260298fd71SBarry Smith   b->garray      = NULL;
3027273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
3028273d9f13SBarry Smith 
3029273d9f13SBarry Smith   /* stuff used in block assembly */
3030273d9f13SBarry Smith   b->barray = 0;
3031273d9f13SBarry Smith 
3032273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
3033273d9f13SBarry Smith   b->lvec  = 0;
3034273d9f13SBarry Smith   b->Mvctx = 0;
3035273d9f13SBarry Smith 
3036273d9f13SBarry Smith   /* stuff for MatGetRow() */
3037273d9f13SBarry Smith   b->rowindices   = 0;
3038273d9f13SBarry Smith   b->rowvalues    = 0;
3039273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
3040273d9f13SBarry Smith 
3041273d9f13SBarry Smith   /* hash table stuff */
3042273d9f13SBarry Smith   b->ht           = 0;
3043273d9f13SBarry Smith   b->hd           = 0;
3044273d9f13SBarry Smith   b->ht_size      = 0;
3045273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
3046273d9f13SBarry Smith   b->ht_fact      = 0;
3047273d9f13SBarry Smith   b->ht_total_ct  = 0;
3048273d9f13SBarry Smith   b->ht_insert_ct = 0;
3049273d9f13SBarry Smith 
30507a868f3eSHong Zhang   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
30517a868f3eSHong Zhang   b->ijonly = PETSC_FALSE;
30527a868f3eSHong Zhang 
30538c07d4e3SBarry Smith 
3054bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3055bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3056bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3057bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3058bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3059bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3060bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3061bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3062bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3063bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3064bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
306517667f90SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
306694ae4db5SBarry Smith 
306794ae4db5SBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
306894ae4db5SBarry Smith   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
306994ae4db5SBarry Smith   if (flg) {
307094ae4db5SBarry Smith     PetscReal fact = 1.39;
307194ae4db5SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
307294ae4db5SBarry Smith     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
307394ae4db5SBarry Smith     if (fact <= 1.0) fact = 1.39;
307494ae4db5SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
307594ae4db5SBarry Smith     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
307694ae4db5SBarry Smith   }
307794ae4db5SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3078273d9f13SBarry Smith   PetscFunctionReturn(0);
3079273d9f13SBarry Smith }
3080273d9f13SBarry Smith 
3081209238afSKris Buschelman /*MC
3082002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3083209238afSKris Buschelman 
3084209238afSKris Buschelman    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3085209238afSKris Buschelman    and MATMPIBAIJ otherwise.
3086209238afSKris Buschelman 
3087209238afSKris Buschelman    Options Database Keys:
3088209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3089209238afSKris Buschelman 
3090209238afSKris Buschelman   Level: beginner
3091209238afSKris Buschelman 
309269b1f4b7SBarry Smith .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3093209238afSKris Buschelman M*/
3094209238afSKris Buschelman 
30954a2ae208SSatish Balay #undef __FUNCT__
30964a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3097273d9f13SBarry Smith /*@C
3098aac34f13SBarry Smith    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3099273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
3100273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
3101273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3102273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
3103273d9f13SBarry Smith 
3104273d9f13SBarry Smith    Collective on Mat
3105273d9f13SBarry Smith 
3106273d9f13SBarry Smith    Input Parameters:
31071c4f3114SJed Brown +  B - the matrix
3108bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3109bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3110273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3111273d9f13SBarry Smith            submatrix  (same for all local rows)
3112273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
3113273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
31140298fd71SBarry Smith            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
311595742e49SBarry Smith            set it even if it is zero.
3116273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3117273d9f13SBarry Smith            submatrix (same for all local rows).
3118273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
3119273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
31200298fd71SBarry Smith            each block row) or NULL.
3121273d9f13SBarry Smith 
312249a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
3123273d9f13SBarry Smith 
3124273d9f13SBarry Smith    Options Database Keys:
31258c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
31268c07d4e3SBarry Smith -   -mat_use_hash_table <fact>
3127273d9f13SBarry Smith 
3128273d9f13SBarry Smith    Notes:
3129273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3130273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
3131273d9f13SBarry Smith 
3132273d9f13SBarry Smith    Storage Information:
3133273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
3134273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
3135273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
3136273d9f13SBarry Smith    local matrix (a rectangular submatrix).
3137273d9f13SBarry Smith 
3138273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
3139273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
31400298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3141273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
3142273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3143273d9f13SBarry Smith 
3144273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3145273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
3146273d9f13SBarry Smith 
3147273d9f13SBarry Smith .vb
3148273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
3149a4b1a0f6SJed Brown           --------------------------
3150273d9f13SBarry Smith    row 3  |o o o d d d o o o o  o  o
3151273d9f13SBarry Smith    row 4  |o o o d d d o o o o  o  o
3152273d9f13SBarry Smith    row 5  |o o o d d d o o o o  o  o
3153a4b1a0f6SJed Brown           --------------------------
3154273d9f13SBarry Smith .ve
3155273d9f13SBarry Smith 
3156273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
3157273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
3158273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3159273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
3160273d9f13SBarry Smith 
3161273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3162273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3163273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
3164273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3165273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
3166273d9f13SBarry Smith    matrices.
3167273d9f13SBarry Smith 
3168aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
3169aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3170aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3171aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3172aa95bbe8SBarry Smith 
3173273d9f13SBarry Smith    Level: intermediate
3174273d9f13SBarry Smith 
3175273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel
3176273d9f13SBarry Smith 
3177ab978733SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3178273d9f13SBarry Smith @*/
31797087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3180273d9f13SBarry Smith {
31814ac538c5SBarry Smith   PetscErrorCode ierr;
3182273d9f13SBarry Smith 
3183273d9f13SBarry Smith   PetscFunctionBegin;
31846ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
31856ba663aaSJed Brown   PetscValidType(B,1);
31866ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
31874ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3188273d9f13SBarry Smith   PetscFunctionReturn(0);
3189273d9f13SBarry Smith }
3190273d9f13SBarry Smith 
31914a2ae208SSatish Balay #undef __FUNCT__
319269b1f4b7SBarry Smith #define __FUNCT__ "MatCreateBAIJ"
319379bdfe76SSatish Balay /*@C
319469b1f4b7SBarry Smith    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
319579bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
319679bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
319779bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
319879bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
319979bdfe76SSatish Balay 
3200db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
3201db81eaa0SLois Curfman McInnes 
320279bdfe76SSatish Balay    Input Parameters:
3203db81eaa0SLois Curfman McInnes +  comm - MPI communicator
3204bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3205bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
320679bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
320792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
320892e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
320992e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
321092e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
321192e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
3212be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3213be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
321447a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
321579bdfe76SSatish Balay            submatrix  (same for all local rows)
321647a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
321792e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
32180298fd71SBarry Smith            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
321995742e49SBarry Smith            and set it even if it is zero.
322047a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
322179bdfe76SSatish Balay            submatrix (same for all local rows).
322247a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
322392e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
32240298fd71SBarry Smith            each block row) or NULL.
322579bdfe76SSatish Balay 
322679bdfe76SSatish Balay    Output Parameter:
322779bdfe76SSatish Balay .  A - the matrix
322879bdfe76SSatish Balay 
3229db81eaa0SLois Curfman McInnes    Options Database Keys:
32308c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
32318c07d4e3SBarry Smith -   -mat_use_hash_table <fact>
32323ffaccefSLois Curfman McInnes 
3233175b88e8SBarry Smith    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3234ae1d86c5SBarry Smith    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3235175b88e8SBarry Smith    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3236175b88e8SBarry Smith 
3237b259b22eSLois Curfman McInnes    Notes:
323849a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
323949a6f317SBarry Smith 
324047a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
324147a75d0bSBarry Smith 
324279bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
324379bdfe76SSatish Balay    (possibly both).
324479bdfe76SSatish Balay 
3245be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3246be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
3247be79a94dSBarry Smith 
324879bdfe76SSatish Balay    Storage Information:
324979bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
325079bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
325179bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
325279bdfe76SSatish Balay    local matrix (a rectangular submatrix).
325379bdfe76SSatish Balay 
325479bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
325579bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
32560298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
325779bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
325879bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
325979bdfe76SSatish Balay 
326079bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
326179bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
326279bdfe76SSatish Balay 
3263db81eaa0SLois Curfman McInnes .vb
3264db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
3265a4b1a0f6SJed Brown           --------------------------
3266db81eaa0SLois Curfman McInnes    row 3  |o o o d d d o o o o  o  o
3267db81eaa0SLois Curfman McInnes    row 4  |o o o d d d o o o o  o  o
3268db81eaa0SLois Curfman McInnes    row 5  |o o o d d d o o o o  o  o
3269a4b1a0f6SJed Brown           --------------------------
3270db81eaa0SLois Curfman McInnes .ve
327179bdfe76SSatish Balay 
327279bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
327379bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
327479bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
327557b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
327679bdfe76SSatish Balay 
3277d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3278d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
327979bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
328092e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
328192e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
32826da5968aSLois Curfman McInnes    matrices.
328379bdfe76SSatish Balay 
3284027ccd11SLois Curfman McInnes    Level: intermediate
3285027ccd11SLois Curfman McInnes 
328692e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
328779bdfe76SSatish Balay 
328869b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
328979bdfe76SSatish Balay @*/
329069b1f4b7SBarry Smith PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
329179bdfe76SSatish Balay {
32926849ba73SBarry Smith   PetscErrorCode ierr;
3293b24ad042SBarry Smith   PetscMPIInt    size;
329479bdfe76SSatish Balay 
3295d64ed03dSBarry Smith   PetscFunctionBegin;
3296f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3297f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3298d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3299273d9f13SBarry Smith   if (size > 1) {
3300273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3301273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3302273d9f13SBarry Smith   } else {
3303273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3304273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
33053914022bSBarry Smith   }
33063a40ed3dSBarry Smith   PetscFunctionReturn(0);
330779bdfe76SSatish Balay }
3308026e39d0SSatish Balay 
33094a2ae208SSatish Balay #undef __FUNCT__
33104a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ"
33116849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
33120ac07820SSatish Balay {
33130ac07820SSatish Balay   Mat            mat;
33140ac07820SSatish Balay   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3315dfbe8321SBarry Smith   PetscErrorCode ierr;
3316b24ad042SBarry Smith   PetscInt       len=0;
33170ac07820SSatish Balay 
3318d64ed03dSBarry Smith   PetscFunctionBegin;
33190ac07820SSatish Balay   *newmat = 0;
3320ce94432eSBarry Smith   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3321d0f46423SBarry Smith   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
33227adad957SLisandro Dalcin   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
33231d5dac46SHong Zhang   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
33247fff6886SHong Zhang 
3325d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
3326273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
33270ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
33287fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
33297fff6886SHong Zhang 
3330273d9f13SBarry Smith   a             = (Mat_MPIBAIJ*)mat->data;
3331d0f46423SBarry Smith   mat->rmap->bs = matin->rmap->bs;
33320ac07820SSatish Balay   a->bs2        = oldmat->bs2;
33330ac07820SSatish Balay   a->mbs        = oldmat->mbs;
33340ac07820SSatish Balay   a->nbs        = oldmat->nbs;
33350ac07820SSatish Balay   a->Mbs        = oldmat->Mbs;
33360ac07820SSatish Balay   a->Nbs        = oldmat->Nbs;
33370ac07820SSatish Balay 
33381e1e43feSBarry Smith   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
33391e1e43feSBarry Smith   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3340899cda47SBarry Smith 
33410ac07820SSatish Balay   a->size         = oldmat->size;
33420ac07820SSatish Balay   a->rank         = oldmat->rank;
3343aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
3344aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
3345aef5e8e0SSatish Balay   a->rowindices   = 0;
33460ac07820SSatish Balay   a->rowvalues    = 0;
33470ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
334830793edcSSatish Balay   a->barray       = 0;
3349899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
3350899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
3351899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
3352899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
33530ac07820SSatish Balay 
3354133cdb44SSatish Balay   /* hash table stuff */
3355133cdb44SSatish Balay   a->ht           = 0;
3356133cdb44SSatish Balay   a->hd           = 0;
3357133cdb44SSatish Balay   a->ht_size      = 0;
3358133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
335925fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
3360133cdb44SSatish Balay   a->ht_total_ct  = 0;
3361133cdb44SSatish Balay   a->ht_insert_ct = 0;
3362133cdb44SSatish Balay 
3363899cda47SBarry Smith   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
33640ac07820SSatish Balay   if (oldmat->colmap) {
3365aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
33660f5bd95cSBarry Smith     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
336748e59246SSatish Balay #else
3368854ce69bSBarry Smith     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
33693bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3370b24ad042SBarry Smith     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
337148e59246SSatish Balay #endif
33720ac07820SSatish Balay   } else a->colmap = 0;
33734beb1cfeSHong Zhang 
33740ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3375785e854fSJed Brown     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
33763bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3377b24ad042SBarry Smith     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
33780ac07820SSatish Balay   } else a->garray = 0;
33790ac07820SSatish Balay 
3380ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
33810ac07820SSatish Balay   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
33823bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
33830ac07820SSatish Balay   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
33843bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
33857fff6886SHong Zhang 
33862e8a6d31SBarry Smith   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
33873bb1ff40SBarry Smith   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
33882e8a6d31SBarry Smith   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
33893bb1ff40SBarry Smith   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3390140e18c1SBarry Smith   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
33910ac07820SSatish Balay   *newmat = mat;
33923a40ed3dSBarry Smith   PetscFunctionReturn(0);
33930ac07820SSatish Balay }
339457b952d6SSatish Balay 
33954a2ae208SSatish Balay #undef __FUNCT__
33965bba2384SShri Abhyankar #define __FUNCT__ "MatLoad_MPIBAIJ"
3397112444f4SShri Abhyankar PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
33984683f7a4SShri Abhyankar {
33994683f7a4SShri Abhyankar   PetscErrorCode ierr;
34004683f7a4SShri Abhyankar   int            fd;
34014683f7a4SShri Abhyankar   PetscInt       i,nz,j,rstart,rend;
34024683f7a4SShri Abhyankar   PetscScalar    *vals,*buf;
3403ce94432eSBarry Smith   MPI_Comm       comm;
34044683f7a4SShri Abhyankar   MPI_Status     status;
34054683f7a4SShri Abhyankar   PetscMPIInt    rank,size,maxnz;
34064683f7a4SShri Abhyankar   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
34070298fd71SBarry Smith   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
34083059b6faSBarry Smith   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
34094683f7a4SShri Abhyankar   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
34100298fd71SBarry Smith   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
34114683f7a4SShri Abhyankar   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
34124683f7a4SShri Abhyankar 
34134683f7a4SShri Abhyankar   PetscFunctionBegin;
3414*c98fd787SBarry Smith   /* force binary viewer to load .info file if it has not yet done so */
3415*c98fd787SBarry Smith   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3416ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
34170298fd71SBarry Smith   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
34180298fd71SBarry Smith   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
34194683f7a4SShri Abhyankar   ierr = PetscOptionsEnd();CHKERRQ(ierr);
34203059b6faSBarry Smith   if (bs < 0) bs = 1;
34214683f7a4SShri Abhyankar 
34224683f7a4SShri Abhyankar   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
34234683f7a4SShri Abhyankar   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
34244683f7a4SShri Abhyankar   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
34255872f025SBarry Smith   if (!rank) {
34264683f7a4SShri Abhyankar     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
34274683f7a4SShri Abhyankar     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
34284683f7a4SShri Abhyankar   }
34294683f7a4SShri Abhyankar 
34304683f7a4SShri Abhyankar   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
34314683f7a4SShri Abhyankar 
34324683f7a4SShri Abhyankar   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
34334683f7a4SShri Abhyankar   M    = header[1]; N = header[2];
34344683f7a4SShri Abhyankar 
34354683f7a4SShri Abhyankar   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
34364683f7a4SShri Abhyankar   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
34374683f7a4SShri Abhyankar   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
34384683f7a4SShri Abhyankar 
34394683f7a4SShri Abhyankar   /* If global sizes are set, check if they are consistent with that given in the file */
34404683f7a4SShri Abhyankar   if (sizesset) {
34414683f7a4SShri Abhyankar     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
34424683f7a4SShri Abhyankar   }
3443abd38a8fSBarry Smith   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3444abd38a8fSBarry Smith   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
34454683f7a4SShri Abhyankar 
3446ce94432eSBarry Smith   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
34474683f7a4SShri Abhyankar 
34484683f7a4SShri Abhyankar   /*
34494683f7a4SShri Abhyankar      This code adds extra rows to make sure the number of rows is
34504683f7a4SShri Abhyankar      divisible by the blocksize
34514683f7a4SShri Abhyankar   */
34524683f7a4SShri Abhyankar   Mbs        = M/bs;
34534683f7a4SShri Abhyankar   extra_rows = bs - M + bs*Mbs;
34544683f7a4SShri Abhyankar   if (extra_rows == bs) extra_rows = 0;
34554683f7a4SShri Abhyankar   else                  Mbs++;
34564683f7a4SShri Abhyankar   if (extra_rows && !rank) {
34574683f7a4SShri Abhyankar     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
34584683f7a4SShri Abhyankar   }
34594683f7a4SShri Abhyankar 
34604683f7a4SShri Abhyankar   /* determine ownership of all rows */
34614683f7a4SShri Abhyankar   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
34624683f7a4SShri Abhyankar     mbs = Mbs/size + ((Mbs % size) > rank);
34634683f7a4SShri Abhyankar     m   = mbs*bs;
34644683f7a4SShri Abhyankar   } else { /* User set */
34654683f7a4SShri Abhyankar     m   = newmat->rmap->n;
34664683f7a4SShri Abhyankar     mbs = m/bs;
34674683f7a4SShri Abhyankar   }
3468dcca6d9dSJed Brown   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
34694683f7a4SShri Abhyankar   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
34704683f7a4SShri Abhyankar 
34714683f7a4SShri Abhyankar   /* process 0 needs enough room for process with most rows */
34724683f7a4SShri Abhyankar   if (!rank) {
34734683f7a4SShri Abhyankar     mmax = rowners[1];
34741251c579SMatthew G Knepley     for (i=2; i<=size; i++) {
34754683f7a4SShri Abhyankar       mmax = PetscMax(mmax,rowners[i]);
34764683f7a4SShri Abhyankar     }
34774683f7a4SShri Abhyankar     mmax*=bs;
34783964eb88SJed Brown   } else mmax = -1;             /* unused, but compiler warns anyway */
34794683f7a4SShri Abhyankar 
34804683f7a4SShri Abhyankar   rowners[0] = 0;
34814683f7a4SShri Abhyankar   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
34824683f7a4SShri Abhyankar   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
34834683f7a4SShri Abhyankar   rstart = rowners[rank];
34844683f7a4SShri Abhyankar   rend   = rowners[rank+1];
34854683f7a4SShri Abhyankar 
34864683f7a4SShri Abhyankar   /* distribute row lengths to all processors */
3487785e854fSJed Brown   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
34884683f7a4SShri Abhyankar   if (!rank) {
34894683f7a4SShri Abhyankar     mend = m;
34904683f7a4SShri Abhyankar     if (size == 1) mend = mend - extra_rows;
34914683f7a4SShri Abhyankar     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
34924683f7a4SShri Abhyankar     for (j=mend; j<m; j++) locrowlens[j] = 1;
3493785e854fSJed Brown     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
34941795a4d1SJed Brown     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
34954683f7a4SShri Abhyankar     for (j=0; j<m; j++) {
34964683f7a4SShri Abhyankar       procsnz[0] += locrowlens[j];
34974683f7a4SShri Abhyankar     }
34984683f7a4SShri Abhyankar     for (i=1; i<size; i++) {
34994683f7a4SShri Abhyankar       mend = browners[i+1] - browners[i];
35004683f7a4SShri Abhyankar       if (i == size-1) mend = mend - extra_rows;
35014683f7a4SShri Abhyankar       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
35024683f7a4SShri Abhyankar       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
35034683f7a4SShri Abhyankar       /* calculate the number of nonzeros on each processor */
35044683f7a4SShri Abhyankar       for (j=0; j<browners[i+1]-browners[i]; j++) {
35054683f7a4SShri Abhyankar         procsnz[i] += rowlengths[j];
35064683f7a4SShri Abhyankar       }
35074683f7a4SShri Abhyankar       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
35084683f7a4SShri Abhyankar     }
35094683f7a4SShri Abhyankar     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
35104683f7a4SShri Abhyankar   } else {
35114683f7a4SShri Abhyankar     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
35124683f7a4SShri Abhyankar   }
35134683f7a4SShri Abhyankar 
35144683f7a4SShri Abhyankar   if (!rank) {
35154683f7a4SShri Abhyankar     /* determine max buffer needed and allocate it */
35164683f7a4SShri Abhyankar     maxnz = procsnz[0];
35174683f7a4SShri Abhyankar     for (i=1; i<size; i++) {
35184683f7a4SShri Abhyankar       maxnz = PetscMax(maxnz,procsnz[i]);
35194683f7a4SShri Abhyankar     }
3520785e854fSJed Brown     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
35214683f7a4SShri Abhyankar 
35224683f7a4SShri Abhyankar     /* read in my part of the matrix column indices  */
35234683f7a4SShri Abhyankar     nz     = procsnz[0];
3524854ce69bSBarry Smith     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
35254683f7a4SShri Abhyankar     mycols = ibuf;
35264683f7a4SShri Abhyankar     if (size == 1) nz -= extra_rows;
35274683f7a4SShri Abhyankar     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
352826fbe8dcSKarl Rupp     if (size == 1) {
352926fbe8dcSKarl Rupp       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
353026fbe8dcSKarl Rupp     }
35314683f7a4SShri Abhyankar 
35324683f7a4SShri Abhyankar     /* read in every ones (except the last) and ship off */
35334683f7a4SShri Abhyankar     for (i=1; i<size-1; i++) {
35344683f7a4SShri Abhyankar       nz   = procsnz[i];
35354683f7a4SShri Abhyankar       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
35364683f7a4SShri Abhyankar       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
35374683f7a4SShri Abhyankar     }
35384683f7a4SShri Abhyankar     /* read in the stuff for the last proc */
35394683f7a4SShri Abhyankar     if (size != 1) {
35404683f7a4SShri Abhyankar       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
35414683f7a4SShri Abhyankar       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
35424683f7a4SShri Abhyankar       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
35434683f7a4SShri Abhyankar       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
35444683f7a4SShri Abhyankar     }
35454683f7a4SShri Abhyankar     ierr = PetscFree(cols);CHKERRQ(ierr);
35464683f7a4SShri Abhyankar   } else {
35474683f7a4SShri Abhyankar     /* determine buffer space needed for message */
35484683f7a4SShri Abhyankar     nz = 0;
35494683f7a4SShri Abhyankar     for (i=0; i<m; i++) {
35504683f7a4SShri Abhyankar       nz += locrowlens[i];
35514683f7a4SShri Abhyankar     }
3552854ce69bSBarry Smith     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
35534683f7a4SShri Abhyankar     mycols = ibuf;
35544683f7a4SShri Abhyankar     /* receive message of column indices*/
35554683f7a4SShri Abhyankar     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
35564683f7a4SShri Abhyankar     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
35574683f7a4SShri Abhyankar     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
35584683f7a4SShri Abhyankar   }
35594683f7a4SShri Abhyankar 
35604683f7a4SShri Abhyankar   /* loop over local rows, determining number of off diagonal entries */
3561dcca6d9dSJed Brown   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
35621795a4d1SJed Brown   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
35634683f7a4SShri Abhyankar   rowcount = 0; nzcount = 0;
35644683f7a4SShri Abhyankar   for (i=0; i<mbs; i++) {
35654683f7a4SShri Abhyankar     dcount  = 0;
35664683f7a4SShri Abhyankar     odcount = 0;
35674683f7a4SShri Abhyankar     for (j=0; j<bs; j++) {
35684683f7a4SShri Abhyankar       kmax = locrowlens[rowcount];
35694683f7a4SShri Abhyankar       for (k=0; k<kmax; k++) {
35704683f7a4SShri Abhyankar         tmp = mycols[nzcount++]/bs;
35714683f7a4SShri Abhyankar         if (!mask[tmp]) {
35724683f7a4SShri Abhyankar           mask[tmp] = 1;
35734683f7a4SShri Abhyankar           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
35744683f7a4SShri Abhyankar           else masked1[dcount++] = tmp;
35754683f7a4SShri Abhyankar         }
35764683f7a4SShri Abhyankar       }
35774683f7a4SShri Abhyankar       rowcount++;
35784683f7a4SShri Abhyankar     }
35794683f7a4SShri Abhyankar 
35804683f7a4SShri Abhyankar     dlens[i]  = dcount;
35814683f7a4SShri Abhyankar     odlens[i] = odcount;
35824683f7a4SShri Abhyankar 
35834683f7a4SShri Abhyankar     /* zero out the mask elements we set */
35844683f7a4SShri Abhyankar     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
35854683f7a4SShri Abhyankar     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
35864683f7a4SShri Abhyankar   }
35874683f7a4SShri Abhyankar 
35884683f7a4SShri Abhyankar 
35894683f7a4SShri Abhyankar   if (!sizesset) {
35904683f7a4SShri Abhyankar     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
35914683f7a4SShri Abhyankar   }
35924683f7a4SShri Abhyankar   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
35934683f7a4SShri Abhyankar 
35944683f7a4SShri Abhyankar   if (!rank) {
3595854ce69bSBarry Smith     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
35964683f7a4SShri Abhyankar     /* read in my part of the matrix numerical values  */
35974683f7a4SShri Abhyankar     nz     = procsnz[0];
35984683f7a4SShri Abhyankar     vals   = buf;
35994683f7a4SShri Abhyankar     mycols = ibuf;
36004683f7a4SShri Abhyankar     if (size == 1) nz -= extra_rows;
36014683f7a4SShri Abhyankar     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
360226fbe8dcSKarl Rupp     if (size == 1) {
360326fbe8dcSKarl Rupp       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
360426fbe8dcSKarl Rupp     }
36054683f7a4SShri Abhyankar 
36064683f7a4SShri Abhyankar     /* insert into matrix */
36074683f7a4SShri Abhyankar     jj = rstart*bs;
36084683f7a4SShri Abhyankar     for (i=0; i<m; i++) {
36094683f7a4SShri Abhyankar       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
36104683f7a4SShri Abhyankar       mycols += locrowlens[i];
36114683f7a4SShri Abhyankar       vals   += locrowlens[i];
36124683f7a4SShri Abhyankar       jj++;
36134683f7a4SShri Abhyankar     }
36144683f7a4SShri Abhyankar     /* read in other processors (except the last one) and ship out */
36154683f7a4SShri Abhyankar     for (i=1; i<size-1; i++) {
36164683f7a4SShri Abhyankar       nz   = procsnz[i];
36174683f7a4SShri Abhyankar       vals = buf;
36184683f7a4SShri Abhyankar       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3619479e424cSMichael Lange       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
36204683f7a4SShri Abhyankar     }
36214683f7a4SShri Abhyankar     /* the last proc */
36224683f7a4SShri Abhyankar     if (size != 1) {
36234683f7a4SShri Abhyankar       nz   = procsnz[i] - extra_rows;
36244683f7a4SShri Abhyankar       vals = buf;
36254683f7a4SShri Abhyankar       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
36264683f7a4SShri Abhyankar       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3627479e424cSMichael Lange       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
36284683f7a4SShri Abhyankar     }
36294683f7a4SShri Abhyankar     ierr = PetscFree(procsnz);CHKERRQ(ierr);
36304683f7a4SShri Abhyankar   } else {
36314683f7a4SShri Abhyankar     /* receive numeric values */
3632854ce69bSBarry Smith     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
36334683f7a4SShri Abhyankar 
36344683f7a4SShri Abhyankar     /* receive message of values*/
36354683f7a4SShri Abhyankar     vals   = buf;
36364683f7a4SShri Abhyankar     mycols = ibuf;
3637479e424cSMichael Lange     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
36384683f7a4SShri Abhyankar 
36394683f7a4SShri Abhyankar     /* insert into matrix */
36404683f7a4SShri Abhyankar     jj = rstart*bs;
36414683f7a4SShri Abhyankar     for (i=0; i<m; i++) {
36424683f7a4SShri Abhyankar       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
36434683f7a4SShri Abhyankar       mycols += locrowlens[i];
36444683f7a4SShri Abhyankar       vals   += locrowlens[i];
36454683f7a4SShri Abhyankar       jj++;
36464683f7a4SShri Abhyankar     }
36474683f7a4SShri Abhyankar   }
36484683f7a4SShri Abhyankar   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
36494683f7a4SShri Abhyankar   ierr = PetscFree(buf);CHKERRQ(ierr);
36504683f7a4SShri Abhyankar   ierr = PetscFree(ibuf);CHKERRQ(ierr);
36514683f7a4SShri Abhyankar   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
36524683f7a4SShri Abhyankar   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
36534683f7a4SShri Abhyankar   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
36544683f7a4SShri Abhyankar   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
36554683f7a4SShri Abhyankar   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
36564683f7a4SShri Abhyankar   PetscFunctionReturn(0);
36574683f7a4SShri Abhyankar }
36584683f7a4SShri Abhyankar 
36594683f7a4SShri Abhyankar #undef __FUNCT__
36604a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3661133cdb44SSatish Balay /*@
3662133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3663133cdb44SSatish Balay 
3664133cdb44SSatish Balay    Input Parameters:
3665133cdb44SSatish Balay .  mat  - the matrix
3666133cdb44SSatish Balay .  fact - factor
3667133cdb44SSatish Balay 
3668c5eb9154SBarry Smith    Not Collective, each process can use a different factor
3669fee21e36SBarry Smith 
36708c890885SBarry Smith    Level: advanced
36718c890885SBarry Smith 
3672133cdb44SSatish Balay   Notes:
36738c07d4e3SBarry Smith    This can also be set by the command line option: -mat_use_hash_table <fact>
3674133cdb44SSatish Balay 
3675133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
3676133cdb44SSatish Balay 
3677133cdb44SSatish Balay .seealso: MatSetOption()
3678133cdb44SSatish Balay @*/
36797087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3680133cdb44SSatish Balay {
36814ac538c5SBarry Smith   PetscErrorCode ierr;
36825bf65638SKris Buschelman 
36835bf65638SKris Buschelman   PetscFunctionBegin;
36844ac538c5SBarry Smith   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
36855bf65638SKris Buschelman   PetscFunctionReturn(0);
36865bf65638SKris Buschelman }
36875bf65638SKris Buschelman 
36885bf65638SKris Buschelman #undef __FUNCT__
36895bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
36907087cfbeSBarry Smith PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
36915bf65638SKris Buschelman {
369225fdafccSSatish Balay   Mat_MPIBAIJ *baij;
3693133cdb44SSatish Balay 
3694133cdb44SSatish Balay   PetscFunctionBegin;
3695133cdb44SSatish Balay   baij          = (Mat_MPIBAIJ*)mat->data;
3696133cdb44SSatish Balay   baij->ht_fact = fact;
3697133cdb44SSatish Balay   PetscFunctionReturn(0);
3698133cdb44SSatish Balay }
3699f2a5309cSSatish Balay 
37004a2ae208SSatish Balay #undef __FUNCT__
37014a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
37029230625dSJed Brown PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3703f2a5309cSSatish Balay {
3704f2a5309cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
37055fd66863SKarl Rupp 
3706f2a5309cSSatish Balay   PetscFunctionBegin;
370721e72a00SBarry Smith   if (Ad)     *Ad     = a->A;
370821e72a00SBarry Smith   if (Ao)     *Ao     = a->B;
370921e72a00SBarry Smith   if (colmap) *colmap = a->garray;
3710f2a5309cSSatish Balay   PetscFunctionReturn(0);
3711f2a5309cSSatish Balay }
371285535b8eSBarry Smith 
371385535b8eSBarry Smith /*
371485535b8eSBarry Smith     Special version for direct calls from Fortran (to eliminate two function call overheads
371585535b8eSBarry Smith */
371685535b8eSBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
371785535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
371885535b8eSBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
371985535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
372085535b8eSBarry Smith #endif
372185535b8eSBarry Smith 
372285535b8eSBarry Smith #undef __FUNCT__
372385535b8eSBarry Smith #define __FUNCT__ "matmpibiajsetvaluesblocked"
372485535b8eSBarry Smith /*@C
372585535b8eSBarry Smith   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
372685535b8eSBarry Smith 
372785535b8eSBarry Smith   Collective on Mat
372885535b8eSBarry Smith 
372985535b8eSBarry Smith   Input Parameters:
373085535b8eSBarry Smith + mat - the matrix
373185535b8eSBarry Smith . min - number of input rows
373285535b8eSBarry Smith . im - input rows
373385535b8eSBarry Smith . nin - number of input columns
373485535b8eSBarry Smith . in - input columns
373585535b8eSBarry Smith . v - numerical values input
373685535b8eSBarry Smith - addvin - INSERT_VALUES or ADD_VALUES
373785535b8eSBarry Smith 
373885535b8eSBarry Smith   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
373985535b8eSBarry Smith 
374085535b8eSBarry Smith   Level: advanced
374185535b8eSBarry Smith 
374285535b8eSBarry Smith .seealso:   MatSetValuesBlocked()
374385535b8eSBarry Smith @*/
374485535b8eSBarry Smith PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
374585535b8eSBarry Smith {
374685535b8eSBarry Smith   /* convert input arguments to C version */
374785535b8eSBarry Smith   Mat        mat  = *matin;
374885535b8eSBarry Smith   PetscInt   m    = *min, n = *nin;
374985535b8eSBarry Smith   InsertMode addv = *addvin;
375085535b8eSBarry Smith 
375185535b8eSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
375285535b8eSBarry Smith   const MatScalar *value;
375385535b8eSBarry Smith   MatScalar       *barray     = baij->barray;
3754ace3abfcSBarry Smith   PetscBool       roworiented = baij->roworiented;
375585535b8eSBarry Smith   PetscErrorCode  ierr;
375685535b8eSBarry Smith   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
375785535b8eSBarry Smith   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3758d0f46423SBarry Smith   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
375985535b8eSBarry Smith 
376085535b8eSBarry Smith   PetscFunctionBegin;
376185535b8eSBarry Smith   /* tasks normally handled by MatSetValuesBlocked() */
376226fbe8dcSKarl Rupp   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
376385535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
3764e7e72b3dSBarry Smith   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3765e32f2f54SBarry Smith   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
376685535b8eSBarry Smith #endif
376785535b8eSBarry Smith   if (mat->assembled) {
376885535b8eSBarry Smith     mat->was_assembled = PETSC_TRUE;
376985535b8eSBarry Smith     mat->assembled     = PETSC_FALSE;
377085535b8eSBarry Smith   }
377185535b8eSBarry Smith   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
377285535b8eSBarry Smith 
377385535b8eSBarry Smith 
377485535b8eSBarry Smith   if (!barray) {
3775785e854fSJed Brown     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
377685535b8eSBarry Smith     baij->barray = barray;
377785535b8eSBarry Smith   }
377885535b8eSBarry Smith 
377926fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
378026fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
378126fbe8dcSKarl Rupp 
378285535b8eSBarry Smith   for (i=0; i<m; i++) {
378385535b8eSBarry Smith     if (im[i] < 0) continue;
378485535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
3785e32f2f54SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
378685535b8eSBarry Smith #endif
378785535b8eSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
378885535b8eSBarry Smith       row = im[i] - rstart;
378985535b8eSBarry Smith       for (j=0; j<n; j++) {
379085535b8eSBarry Smith         /* If NumCol = 1 then a copy is not required */
379185535b8eSBarry Smith         if ((roworiented) && (n == 1)) {
379285535b8eSBarry Smith           barray = (MatScalar*)v + i*bs2;
379385535b8eSBarry Smith         } else if ((!roworiented) && (m == 1)) {
379485535b8eSBarry Smith           barray = (MatScalar*)v + j*bs2;
379585535b8eSBarry Smith         } else { /* Here a copy is required */
379685535b8eSBarry Smith           if (roworiented) {
379785535b8eSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
379885535b8eSBarry Smith           } else {
379985535b8eSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
380085535b8eSBarry Smith           }
380185535b8eSBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
380285535b8eSBarry Smith             for (jj=0; jj<bs; jj++) {
380385535b8eSBarry Smith               *barray++ = *value++;
380485535b8eSBarry Smith             }
380585535b8eSBarry Smith           }
380685535b8eSBarry Smith           barray -=bs2;
380785535b8eSBarry Smith         }
380885535b8eSBarry Smith 
380985535b8eSBarry Smith         if (in[j] >= cstart && in[j] < cend) {
381085535b8eSBarry Smith           col  = in[j] - cstart;
381197e5c40aSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
381226fbe8dcSKarl Rupp         } else if (in[j] < 0) continue;
381385535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
3814cb9801acSJed Brown         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
381585535b8eSBarry Smith #endif
381685535b8eSBarry Smith         else {
381785535b8eSBarry Smith           if (mat->was_assembled) {
381885535b8eSBarry Smith             if (!baij->colmap) {
3819ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
382085535b8eSBarry Smith             }
382185535b8eSBarry Smith 
382285535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
382385535b8eSBarry Smith #if defined(PETSC_USE_CTABLE)
382485535b8eSBarry Smith             { PetscInt data;
382585535b8eSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3826e32f2f54SBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
382785535b8eSBarry Smith             }
382885535b8eSBarry Smith #else
3829e32f2f54SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
383085535b8eSBarry Smith #endif
383185535b8eSBarry Smith #endif
383285535b8eSBarry Smith #if defined(PETSC_USE_CTABLE)
383385535b8eSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
383485535b8eSBarry Smith             col  = (col - 1)/bs;
383585535b8eSBarry Smith #else
383685535b8eSBarry Smith             col = (baij->colmap[in[j]] - 1)/bs;
383785535b8eSBarry Smith #endif
383885535b8eSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3839ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
384085535b8eSBarry Smith               col  =  in[j];
384185535b8eSBarry Smith             }
384226fbe8dcSKarl Rupp           } else col = in[j];
384397e5c40aSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
384485535b8eSBarry Smith         }
384585535b8eSBarry Smith       }
384685535b8eSBarry Smith     } else {
384785535b8eSBarry Smith       if (!baij->donotstash) {
384885535b8eSBarry Smith         if (roworiented) {
384985535b8eSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
385085535b8eSBarry Smith         } else {
385185535b8eSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
385285535b8eSBarry Smith         }
385385535b8eSBarry Smith       }
385485535b8eSBarry Smith     }
385585535b8eSBarry Smith   }
385685535b8eSBarry Smith 
385785535b8eSBarry Smith   /* task normally handled by MatSetValuesBlocked() */
385885535b8eSBarry Smith   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
385985535b8eSBarry Smith   PetscFunctionReturn(0);
386085535b8eSBarry Smith }
3861dfb205c3SBarry Smith 
3862dfb205c3SBarry Smith #undef __FUNCT__
3863dfb205c3SBarry Smith #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3864dfb205c3SBarry Smith /*@
3865dfb205c3SBarry Smith      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3866dfb205c3SBarry Smith          CSR format the local rows.
3867dfb205c3SBarry Smith 
3868dfb205c3SBarry Smith    Collective on MPI_Comm
3869dfb205c3SBarry Smith 
3870dfb205c3SBarry Smith    Input Parameters:
3871dfb205c3SBarry Smith +  comm - MPI communicator
3872dfb205c3SBarry Smith .  bs - the block size, only a block size of 1 is supported
3873dfb205c3SBarry Smith .  m - number of local rows (Cannot be PETSC_DECIDE)
3874dfb205c3SBarry Smith .  n - This value should be the same as the local size used in creating the
3875dfb205c3SBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3876dfb205c3SBarry Smith        calculated if N is given) For square matrices n is almost always m.
3877dfb205c3SBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3878dfb205c3SBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3879dfb205c3SBarry Smith .   i - row indices
3880dfb205c3SBarry Smith .   j - column indices
3881dfb205c3SBarry Smith -   a - matrix values
3882dfb205c3SBarry Smith 
3883dfb205c3SBarry Smith    Output Parameter:
3884dfb205c3SBarry Smith .   mat - the matrix
3885dfb205c3SBarry Smith 
3886dfb205c3SBarry Smith    Level: intermediate
3887dfb205c3SBarry Smith 
3888dfb205c3SBarry Smith    Notes:
3889dfb205c3SBarry Smith        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3890dfb205c3SBarry Smith      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3891dfb205c3SBarry Smith      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3892dfb205c3SBarry Smith 
38933adadaf3SJed Brown      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
38943adadaf3SJed Brown      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
38953adadaf3SJed Brown      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
38963adadaf3SJed Brown      with column-major ordering within blocks.
38973adadaf3SJed Brown 
3898dfb205c3SBarry Smith        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3899dfb205c3SBarry Smith 
3900dfb205c3SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel
3901dfb205c3SBarry Smith 
3902dfb205c3SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
390369b1f4b7SBarry Smith           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3904dfb205c3SBarry Smith @*/
39057087cfbeSBarry Smith PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3906dfb205c3SBarry Smith {
3907dfb205c3SBarry Smith   PetscErrorCode ierr;
3908dfb205c3SBarry Smith 
3909dfb205c3SBarry Smith   PetscFunctionBegin;
3910f23aa3ddSBarry Smith   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3911dfb205c3SBarry Smith   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3912dfb205c3SBarry Smith   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3913dfb205c3SBarry Smith   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3914dfb205c3SBarry Smith   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3915d47bf9aaSJed Brown   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3916dfb205c3SBarry Smith   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3917d47bf9aaSJed Brown   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3918dfb205c3SBarry Smith   PetscFunctionReturn(0);
3919dfb205c3SBarry Smith }
3920e561ad89SHong Zhang 
3921e561ad89SHong Zhang #undef __FUNCT__
3922bd153df0SHong Zhang #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ"
3923bd153df0SHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3924e561ad89SHong Zhang {
3925e561ad89SHong Zhang   PetscErrorCode ierr;
3926bd153df0SHong Zhang   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3927bd153df0SHong Zhang   PetscInt       *indx;
3928bd153df0SHong Zhang   PetscScalar    *values;
3929e561ad89SHong Zhang 
3930e561ad89SHong Zhang   PetscFunctionBegin;
3931e561ad89SHong Zhang   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3932bd153df0SHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3933bd153df0SHong Zhang     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3934bd153df0SHong Zhang     PetscInt       *dnz,*onz,sum,mbs,Nbs;
3935bd153df0SHong Zhang     PetscInt       *bindx,rmax=a->rmax,j;
3936e561ad89SHong Zhang 
3937bd153df0SHong Zhang     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3938bd153df0SHong Zhang     mbs = m/bs; Nbs = N/cbs;
3939bd153df0SHong Zhang     if (n == PETSC_DECIDE) {
3940bd153df0SHong Zhang       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3941bd153df0SHong Zhang     }
3942bd153df0SHong Zhang     /* Check sum(n) = Nbs */
3943bd153df0SHong Zhang     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3944bd153df0SHong Zhang     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3945bd153df0SHong Zhang 
3946bd153df0SHong Zhang     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3947bd153df0SHong Zhang     rstart -= mbs;
3948e561ad89SHong Zhang 
3949647a6520SHong Zhang     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3950bd153df0SHong Zhang     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3951bd153df0SHong Zhang     for (i=0; i<mbs; i++) {
3952647a6520SHong Zhang       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3953647a6520SHong Zhang       nnz = nnz/bs;
3954647a6520SHong Zhang       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3955647a6520SHong Zhang       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3956647a6520SHong Zhang       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3957e561ad89SHong Zhang     }
3958647a6520SHong Zhang     ierr = PetscFree(bindx);CHKERRQ(ierr);
3959e561ad89SHong Zhang 
3960e561ad89SHong Zhang     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3961bd153df0SHong Zhang     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3962e561ad89SHong Zhang     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3963e561ad89SHong Zhang     ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr);
3964e561ad89SHong Zhang     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3965e561ad89SHong Zhang     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3966e561ad89SHong Zhang   }
3967e561ad89SHong Zhang 
3968bd153df0SHong Zhang   /* numeric phase */
3969647a6520SHong Zhang   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3970bd153df0SHong Zhang   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3971e561ad89SHong Zhang 
3972e561ad89SHong Zhang   for (i=0; i<m; i++) {
3973e561ad89SHong Zhang     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3974e561ad89SHong Zhang     Ii   = i + rstart;
3975bd153df0SHong Zhang     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3976e561ad89SHong Zhang     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3977e561ad89SHong Zhang   }
3978bd153df0SHong Zhang   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3979bd153df0SHong Zhang   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3980e561ad89SHong Zhang   PetscFunctionReturn(0);
3981e561ad89SHong Zhang }
3982