xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision c5d9258e2ce681285d6bde442a67a024b441b0bc)
179bdfe76SSatish Balay 
2c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3*c5d9258eSSatish Balay 
4c6db04a5SJed Brown #include <petscblaslapack.h>
565a92638SMatthew G. Knepley #include <petscsf.h>
679bdfe76SSatish Balay 
74a2ae208SSatish Balay #undef __FUNCT__
8985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
9985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
107843d17aSBarry Smith {
117843d17aSBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
12dfbe8321SBarry Smith   PetscErrorCode ierr;
13985db425SBarry Smith   PetscInt       i,*idxb = 0;
1487828ca2SBarry Smith   PetscScalar    *va,*vb;
157843d17aSBarry Smith   Vec            vtmp;
167843d17aSBarry Smith 
177843d17aSBarry Smith   PetscFunctionBegin;
18985db425SBarry Smith   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
191ebc52fbSHong Zhang   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
20985db425SBarry Smith   if (idx) {
2126fbe8dcSKarl Rupp     for (i=0; i<A->rmap->n; i++) {
2226fbe8dcSKarl Rupp       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2326fbe8dcSKarl Rupp     }
24985db425SBarry Smith   }
257843d17aSBarry Smith 
26d0f46423SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
27785e854fSJed Brown   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
28985db425SBarry Smith   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
291ebc52fbSHong Zhang   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
307843d17aSBarry Smith 
31d0f46423SBarry Smith   for (i=0; i<A->rmap->n; i++) {
3226fbe8dcSKarl Rupp     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3326fbe8dcSKarl Rupp       va[i] = vb[i];
3426fbe8dcSKarl Rupp       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
3526fbe8dcSKarl Rupp     }
367843d17aSBarry Smith   }
377843d17aSBarry Smith 
381ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
391ebc52fbSHong Zhang   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
40c31cb41cSBarry Smith   ierr = PetscFree(idxb);CHKERRQ(ierr);
416bf464f9SBarry Smith   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
427843d17aSBarry Smith   PetscFunctionReturn(0);
437843d17aSBarry Smith }
447843d17aSBarry Smith 
454a2ae208SSatish Balay #undef __FUNCT__
464a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ"
477087cfbeSBarry Smith PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
487fc3c18eSBarry Smith {
497fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50dfbe8321SBarry Smith   PetscErrorCode ierr;
517fc3c18eSBarry Smith 
527fc3c18eSBarry Smith   PetscFunctionBegin;
537fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
547fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
557fc3c18eSBarry Smith   PetscFunctionReturn(0);
567fc3c18eSBarry Smith }
577fc3c18eSBarry Smith 
584a2ae208SSatish Balay #undef __FUNCT__
594a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
607087cfbeSBarry Smith PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
617fc3c18eSBarry Smith {
627fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
63dfbe8321SBarry Smith   PetscErrorCode ierr;
647fc3c18eSBarry Smith 
657fc3c18eSBarry Smith   PetscFunctionBegin;
667fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
677fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
687fc3c18eSBarry Smith   PetscFunctionReturn(0);
697fc3c18eSBarry Smith }
707fc3c18eSBarry Smith 
71537820f0SBarry Smith /*
72537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
7357b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
74e06f6af7SJed Brown    storage of the matrix.  This is done in a non scalable way since the
7557b952d6SSatish Balay    length of colmap equals the global matrix length.
7657b952d6SSatish Balay */
774a2ae208SSatish Balay #undef __FUNCT__
78ab9863d7SBarry Smith #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
79ab9863d7SBarry Smith PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
8057b952d6SSatish Balay {
8157b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
8257b952d6SSatish Balay   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
836849ba73SBarry Smith   PetscErrorCode ierr;
84d0f46423SBarry Smith   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
8557b952d6SSatish Balay 
86d64ed03dSBarry Smith   PetscFunctionBegin;
87aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
88e23dfa41SBarry Smith   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
8948e59246SSatish Balay   for (i=0; i<nbs; i++) {
903861aac3SJed Brown     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
9148e59246SSatish Balay   }
9248e59246SSatish Balay #else
93785e854fSJed Brown   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
943bb1ff40SBarry Smith   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
95b24ad042SBarry Smith   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
96928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
9748e59246SSatish Balay #endif
983a40ed3dSBarry Smith   PetscFunctionReturn(0);
9957b952d6SSatish Balay }
10057b952d6SSatish Balay 
101f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
10280c1aa95SSatish Balay   { \
10380c1aa95SSatish Balay  \
10480c1aa95SSatish Balay     brow = row/bs;  \
10580c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
106ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
10780c1aa95SSatish Balay     bcol = col/bs; \
10880c1aa95SSatish Balay     ridx = row % bs; cidx = col % bs; \
109ab26458aSBarry Smith     low  = 0; high = nrow; \
110ab26458aSBarry Smith     while (high-low > 3) { \
111ab26458aSBarry Smith       t = (low+high)/2; \
112ab26458aSBarry Smith       if (rp[t] > bcol) high = t; \
113ab26458aSBarry Smith       else              low  = t; \
114ab26458aSBarry Smith     } \
115ab26458aSBarry Smith     for (_i=low; _i<high; _i++) { \
11680c1aa95SSatish Balay       if (rp[_i] > bcol) break; \
11780c1aa95SSatish Balay       if (rp[_i] == bcol) { \
11880c1aa95SSatish Balay         bap = ap +  bs2*_i + bs*cidx + ridx; \
119eada6651SSatish Balay         if (addv == ADD_VALUES) *bap += value;  \
120eada6651SSatish Balay         else                    *bap  = value;  \
121ac7a638eSSatish Balay         goto a_noinsert; \
12280c1aa95SSatish Balay       } \
12380c1aa95SSatish Balay     } \
12489280ab3SLois Curfman McInnes     if (a->nonew == 1) goto a_noinsert; \
125e32f2f54SBarry Smith     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
126fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
12780c1aa95SSatish Balay     N = nrow++ - 1;  \
12880c1aa95SSatish Balay     /* shift up all the later entries in this row */ \
12980c1aa95SSatish Balay     for (ii=N; ii>=_i; ii--) { \
13080c1aa95SSatish Balay       rp[ii+1] = rp[ii]; \
1313eda8832SBarry Smith       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
13280c1aa95SSatish Balay     } \
1333eda8832SBarry Smith     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
13480c1aa95SSatish Balay     rp[_i]                      = bcol;  \
13580c1aa95SSatish Balay     ap[bs2*_i + bs*cidx + ridx] = value;  \
136ac7a638eSSatish Balay a_noinsert:; \
13780c1aa95SSatish Balay     ailen[brow] = nrow; \
13880c1aa95SSatish Balay   }
13957b952d6SSatish Balay 
140ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
141ac7a638eSSatish Balay   { \
142ac7a638eSSatish Balay     brow = row/bs;  \
143ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
144ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
145ac7a638eSSatish Balay     bcol = col/bs; \
146ac7a638eSSatish Balay     ridx = row % bs; cidx = col % bs; \
147ac7a638eSSatish Balay     low  = 0; high = nrow; \
148ac7a638eSSatish Balay     while (high-low > 3) { \
149ac7a638eSSatish Balay       t = (low+high)/2; \
150ac7a638eSSatish Balay       if (rp[t] > bcol) high = t; \
151ac7a638eSSatish Balay       else              low  = t; \
152ac7a638eSSatish Balay     } \
153ac7a638eSSatish Balay     for (_i=low; _i<high; _i++) { \
154ac7a638eSSatish Balay       if (rp[_i] > bcol) break; \
155ac7a638eSSatish Balay       if (rp[_i] == bcol) { \
156ac7a638eSSatish Balay         bap = ap +  bs2*_i + bs*cidx + ridx; \
157ac7a638eSSatish Balay         if (addv == ADD_VALUES) *bap += value;  \
158ac7a638eSSatish Balay         else                    *bap  = value;  \
159ac7a638eSSatish Balay         goto b_noinsert; \
160ac7a638eSSatish Balay       } \
161ac7a638eSSatish Balay     } \
16289280ab3SLois Curfman McInnes     if (b->nonew == 1) goto b_noinsert; \
163e32f2f54SBarry Smith     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
164fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
165ac7a638eSSatish Balay     N = nrow++ - 1;  \
166ac7a638eSSatish Balay     /* shift up all the later entries in this row */ \
167ac7a638eSSatish Balay     for (ii=N; ii>=_i; ii--) { \
168ac7a638eSSatish Balay       rp[ii+1] = rp[ii]; \
1693eda8832SBarry Smith       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
170ac7a638eSSatish Balay     } \
1713eda8832SBarry Smith     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
172ac7a638eSSatish Balay     rp[_i]                      = bcol;  \
173ac7a638eSSatish Balay     ap[bs2*_i + bs*cidx + ridx] = value;  \
174ac7a638eSSatish Balay b_noinsert:; \
175ac7a638eSSatish Balay     bilen[brow] = nrow; \
176ac7a638eSSatish Balay   }
177ac7a638eSSatish Balay 
1784a2ae208SSatish Balay #undef __FUNCT__
1794a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ"
180b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
18157b952d6SSatish Balay {
18257b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
18393fea6afSBarry Smith   MatScalar      value;
184ace3abfcSBarry Smith   PetscBool      roworiented = baij->roworiented;
185dfbe8321SBarry Smith   PetscErrorCode ierr;
186b24ad042SBarry Smith   PetscInt       i,j,row,col;
187d0f46423SBarry Smith   PetscInt       rstart_orig=mat->rmap->rstart;
188d0f46423SBarry Smith   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
189d0f46423SBarry Smith   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
19057b952d6SSatish Balay 
191eada6651SSatish Balay   /* Some Variables required in the macro */
19280c1aa95SSatish Balay   Mat         A     = baij->A;
19380c1aa95SSatish Balay   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
194b24ad042SBarry Smith   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
1953eda8832SBarry Smith   MatScalar   *aa   =a->a;
196ac7a638eSSatish Balay 
197ac7a638eSSatish Balay   Mat         B     = baij->B;
198ac7a638eSSatish Balay   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
199b24ad042SBarry Smith   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
2003eda8832SBarry Smith   MatScalar   *ba   =b->a;
201ac7a638eSSatish Balay 
202b24ad042SBarry Smith   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
203b24ad042SBarry Smith   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
2043eda8832SBarry Smith   MatScalar *ap,*bap;
20580c1aa95SSatish Balay 
206d64ed03dSBarry Smith   PetscFunctionBegin;
20757b952d6SSatish Balay   for (i=0; i<m; i++) {
2085ef9f2a5SBarry Smith     if (im[i] < 0) continue;
2092515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
210e32f2f54SBarry Smith     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
211639f9d9dSBarry Smith #endif
21257b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
21357b952d6SSatish Balay       row = im[i] - rstart_orig;
21457b952d6SSatish Balay       for (j=0; j<n; j++) {
21557b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig) {
21657b952d6SSatish Balay           col = in[j] - cstart_orig;
217db4deed7SKarl Rupp           if (roworiented) value = v[i*n+j];
218db4deed7SKarl Rupp           else             value = v[i+j*m];
219f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
22080c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
22173959e64SBarry Smith         } else if (in[j] < 0) continue;
2222515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
223660746e0SBarry Smith         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
224639f9d9dSBarry Smith #endif
22557b952d6SSatish Balay         else {
22657b952d6SSatish Balay           if (mat->was_assembled) {
227905e6a2fSBarry Smith             if (!baij->colmap) {
228ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
229905e6a2fSBarry Smith             }
230aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
2310f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
232bba1ac68SSatish Balay             col  = col - 1;
23348e59246SSatish Balay #else
234bba1ac68SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
23548e59246SSatish Balay #endif
236c9ef50b2SBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
237ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
2388295de27SSatish Balay               col  =  in[j];
2399bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2409bf004c3SSatish Balay               B    = baij->B;
2419bf004c3SSatish Balay               b    = (Mat_SeqBAIJ*)(B)->data;
2429bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2439bf004c3SSatish Balay               ba   =b->a;
244c9ef50b2SBarry Smith             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
245c9ef50b2SBarry Smith             else col += in[j]%bs;
2468295de27SSatish Balay           } else col = in[j];
247db4deed7SKarl Rupp           if (roworiented) value = v[i*n+j];
248db4deed7SKarl Rupp           else             value = v[i+j*m];
24990da58bdSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
25090da58bdSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
25157b952d6SSatish Balay         }
25257b952d6SSatish Balay       }
253d64ed03dSBarry Smith     } else {
2544cb17eb5SBarry Smith       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
25590f02eecSBarry Smith       if (!baij->donotstash) {
2565080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
257ff2fd236SBarry Smith         if (roworiented) {
258b400d20cSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
259ff2fd236SBarry Smith         } else {
260b400d20cSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
26157b952d6SSatish Balay         }
26257b952d6SSatish Balay       }
26357b952d6SSatish Balay     }
26490f02eecSBarry Smith   }
2653a40ed3dSBarry Smith   PetscFunctionReturn(0);
26657b952d6SSatish Balay }
26757b952d6SSatish Balay 
2684a2ae208SSatish Balay #undef __FUNCT__
26997e5c40aSBarry Smith #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
27097e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
271ab26458aSBarry Smith {
272ab26458aSBarry Smith   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
273dd6ea824SBarry Smith   const PetscScalar *value;
274f15d580aSBarry Smith   MatScalar         *barray     = baij->barray;
275ace3abfcSBarry Smith   PetscBool         roworiented = baij->roworiented;
276dfbe8321SBarry Smith   PetscErrorCode    ierr;
277899cda47SBarry Smith   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
278899cda47SBarry Smith   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
279d0f46423SBarry Smith   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
280ab26458aSBarry Smith 
281b16ae2b1SBarry Smith   PetscFunctionBegin;
28230793edcSSatish Balay   if (!barray) {
283785e854fSJed Brown     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
28482502324SSatish Balay     baij->barray = barray;
28530793edcSSatish Balay   }
28630793edcSSatish Balay 
28726fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
28826fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
28926fbe8dcSKarl Rupp 
290ab26458aSBarry Smith   for (i=0; i<m; i++) {
2915ef9f2a5SBarry Smith     if (im[i] < 0) continue;
2922515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
293e32f2f54SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
294ab26458aSBarry Smith #endif
295ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
296ab26458aSBarry Smith       row = im[i] - rstart;
297ab26458aSBarry Smith       for (j=0; j<n; j++) {
29815b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
29915b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
300f15d580aSBarry Smith           barray = (MatScalar*)v + i*bs2;
30115b57d14SSatish Balay         } else if ((!roworiented) && (m == 1)) {
302f15d580aSBarry Smith           barray = (MatScalar*)v + j*bs2;
30315b57d14SSatish Balay         } else { /* Here a copy is required */
304ab26458aSBarry Smith           if (roworiented) {
30553ef36baSBarry Smith             value = v + (i*(stepval+bs) + j)*bs;
306ab26458aSBarry Smith           } else {
30753ef36baSBarry Smith             value = v + (j*(stepval+bs) + i)*bs;
308abef11f7SSatish Balay           }
30953ef36baSBarry Smith           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
31026fbe8dcSKarl Rupp             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
31153ef36baSBarry Smith             barray += bs;
31247513183SBarry Smith           }
31330793edcSSatish Balay           barray -= bs2;
31415b57d14SSatish Balay         }
315abef11f7SSatish Balay 
316abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend) {
317abef11f7SSatish Balay           col  = in[j] - cstart;
31897e5c40aSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
31926fbe8dcSKarl Rupp         } else if (in[j] < 0) continue;
3202515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
321cb9801acSJed Brown         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
322ab26458aSBarry Smith #endif
323ab26458aSBarry Smith         else {
324ab26458aSBarry Smith           if (mat->was_assembled) {
325ab26458aSBarry Smith             if (!baij->colmap) {
326ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
327ab26458aSBarry Smith             }
328a5eb4965SSatish Balay 
3292515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
330aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
331b24ad042SBarry Smith             { PetscInt data;
3320f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
333e32f2f54SBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
334fa46199cSSatish Balay             }
33548e59246SSatish Balay #else
336e32f2f54SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
337a5eb4965SSatish Balay #endif
33848e59246SSatish Balay #endif
339aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
3400f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
341fa46199cSSatish Balay             col  = (col - 1)/bs;
34248e59246SSatish Balay #else
343a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
34448e59246SSatish Balay #endif
3450e9bae81SBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
346ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
347ab26458aSBarry Smith               col  =  in[j];
3480e9bae81SBarry Smith             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
349db4deed7SKarl Rupp           } else col = in[j];
35097e5c40aSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
351ab26458aSBarry Smith         }
352ab26458aSBarry Smith       }
353d64ed03dSBarry Smith     } else {
3544cb17eb5SBarry Smith       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
355ab26458aSBarry Smith       if (!baij->donotstash) {
356ff2fd236SBarry Smith         if (roworiented) {
3576fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
358ff2fd236SBarry Smith         } else {
3596fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
360ff2fd236SBarry Smith         }
361abef11f7SSatish Balay       }
362ab26458aSBarry Smith     }
363ab26458aSBarry Smith   }
3643a40ed3dSBarry Smith   PetscFunctionReturn(0);
365ab26458aSBarry Smith }
3666fa18ffdSBarry Smith 
3670bdbc534SSatish Balay #define HASH_KEY 0.6180339887
368b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
369b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
370b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
3714a2ae208SSatish Balay #undef __FUNCT__
37297e5c40aSBarry Smith #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
37397e5c40aSBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
3740bdbc534SSatish Balay {
3750bdbc534SSatish Balay   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
376ace3abfcSBarry Smith   PetscBool      roworiented = baij->roworiented;
377dfbe8321SBarry Smith   PetscErrorCode ierr;
378b24ad042SBarry Smith   PetscInt       i,j,row,col;
379d0f46423SBarry Smith   PetscInt       rstart_orig=mat->rmap->rstart;
380d0f46423SBarry Smith   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
381d0f46423SBarry Smith   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
382329f5518SBarry Smith   PetscReal      tmp;
3833eda8832SBarry Smith   MatScalar      **HD = baij->hd,value;
3842515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
385b24ad042SBarry Smith   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
3864a15367fSSatish Balay #endif
3870bdbc534SSatish Balay 
3880bdbc534SSatish Balay   PetscFunctionBegin;
3890bdbc534SSatish Balay   for (i=0; i<m; i++) {
3902515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
391e32f2f54SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
392e32f2f54SBarry Smith     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
3930bdbc534SSatish Balay #endif
3940bdbc534SSatish Balay     row = im[i];
395c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
3960bdbc534SSatish Balay       for (j=0; j<n; j++) {
3970bdbc534SSatish Balay         col = in[j];
398db4deed7SKarl Rupp         if (roworiented) value = v[i*n+j];
399db4deed7SKarl Rupp         else             value = v[i+j*m];
400b24ad042SBarry Smith         /* Look up PetscInto the Hash Table */
401c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
402c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4030bdbc534SSatish Balay 
404c2760754SSatish Balay 
405c2760754SSatish Balay         idx = h1;
4062515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
407187ce0cbSSatish Balay         insert_ct++;
408187ce0cbSSatish Balay         total_ct++;
409187ce0cbSSatish Balay         if (HT[idx] != key) {
410187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
411187ce0cbSSatish Balay           if (idx == size) {
412187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
413f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
414187ce0cbSSatish Balay           }
415187ce0cbSSatish Balay         }
416187ce0cbSSatish Balay #else
417c2760754SSatish Balay         if (HT[idx] != key) {
418c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
419c2760754SSatish Balay           if (idx == size) {
420c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
421f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
422c2760754SSatish Balay           }
423c2760754SSatish Balay         }
424187ce0cbSSatish Balay #endif
425c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
426c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
427c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
4280bdbc534SSatish Balay       }
42926fbe8dcSKarl Rupp     } else if (!baij->donotstash) {
430ff2fd236SBarry Smith       if (roworiented) {
431b400d20cSBarry Smith         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
432ff2fd236SBarry Smith       } else {
433b400d20cSBarry Smith         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
4340bdbc534SSatish Balay       }
4350bdbc534SSatish Balay     }
4360bdbc534SSatish Balay   }
4372515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
438187ce0cbSSatish Balay   baij->ht_total_ct  = total_ct;
439187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
440187ce0cbSSatish Balay #endif
4410bdbc534SSatish Balay   PetscFunctionReturn(0);
4420bdbc534SSatish Balay }
4430bdbc534SSatish Balay 
4444a2ae208SSatish Balay #undef __FUNCT__
44597e5c40aSBarry Smith #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
44697e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
4470bdbc534SSatish Balay {
4480bdbc534SSatish Balay   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
449ace3abfcSBarry Smith   PetscBool         roworiented = baij->roworiented;
450dfbe8321SBarry Smith   PetscErrorCode    ierr;
451b24ad042SBarry Smith   PetscInt          i,j,ii,jj,row,col;
452899cda47SBarry Smith   PetscInt          rstart=baij->rstartbs;
453d0f46423SBarry Smith   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
454b24ad042SBarry Smith   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
455329f5518SBarry Smith   PetscReal         tmp;
4563eda8832SBarry Smith   MatScalar         **HD = baij->hd,*baij_a;
457dd6ea824SBarry Smith   const PetscScalar *v_t,*value;
4582515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
459b24ad042SBarry Smith   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4604a15367fSSatish Balay #endif
4610bdbc534SSatish Balay 
462d0a41580SSatish Balay   PetscFunctionBegin;
46326fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
46426fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
46526fbe8dcSKarl Rupp 
4660bdbc534SSatish Balay   for (i=0; i<m; i++) {
4672515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
468e32f2f54SBarry Smith     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
469e32f2f54SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
4700bdbc534SSatish Balay #endif
4710bdbc534SSatish Balay     row = im[i];
472ab715e2cSSatish Balay     v_t = v + i*nbs2;
473c2760754SSatish Balay     if (row >= rstart && row < rend) {
4740bdbc534SSatish Balay       for (j=0; j<n; j++) {
4750bdbc534SSatish Balay         col = in[j];
4760bdbc534SSatish Balay 
4770bdbc534SSatish Balay         /* Look up into the Hash Table */
478c2760754SSatish Balay         key = row*Nbs+col+1;
479c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4800bdbc534SSatish Balay 
481c2760754SSatish Balay         idx = h1;
4822515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
483187ce0cbSSatish Balay         total_ct++;
484187ce0cbSSatish Balay         insert_ct++;
485187ce0cbSSatish Balay         if (HT[idx] != key) {
486187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
487187ce0cbSSatish Balay           if (idx == size) {
488187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
489f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
490187ce0cbSSatish Balay           }
491187ce0cbSSatish Balay         }
492187ce0cbSSatish Balay #else
493c2760754SSatish Balay         if (HT[idx] != key) {
494c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
495c2760754SSatish Balay           if (idx == size) {
496c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
497f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498c2760754SSatish Balay           }
499c2760754SSatish Balay         }
500187ce0cbSSatish Balay #endif
501c2760754SSatish Balay         baij_a = HD[idx];
5020bdbc534SSatish Balay         if (roworiented) {
503c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
504187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
505187ce0cbSSatish Balay           value = v_t;
506187ce0cbSSatish Balay           v_t  += bs;
507fef45726SSatish Balay           if (addv == ADD_VALUES) {
508c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
509c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
510fef45726SSatish Balay                 baij_a[jj] += *value++;
511b4cc0f5aSSatish Balay               }
512b4cc0f5aSSatish Balay             }
513fef45726SSatish Balay           } else {
514c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
515c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
516fef45726SSatish Balay                 baij_a[jj] = *value++;
517fef45726SSatish Balay               }
518fef45726SSatish Balay             }
519fef45726SSatish Balay           }
5200bdbc534SSatish Balay         } else {
5210bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
522fef45726SSatish Balay           if (addv == ADD_VALUES) {
523b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
5240bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
525fef45726SSatish Balay                 baij_a[jj] += *value++;
526fef45726SSatish Balay               }
527fef45726SSatish Balay             }
528fef45726SSatish Balay           } else {
529fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
530fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
531fef45726SSatish Balay                 baij_a[jj] = *value++;
532fef45726SSatish Balay               }
533b4cc0f5aSSatish Balay             }
5340bdbc534SSatish Balay           }
5350bdbc534SSatish Balay         }
5360bdbc534SSatish Balay       }
5370bdbc534SSatish Balay     } else {
5380bdbc534SSatish Balay       if (!baij->donotstash) {
5390bdbc534SSatish Balay         if (roworiented) {
5408798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
5410bdbc534SSatish Balay         } else {
5428798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
5430bdbc534SSatish Balay         }
5440bdbc534SSatish Balay       }
5450bdbc534SSatish Balay     }
5460bdbc534SSatish Balay   }
5472515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
548187ce0cbSSatish Balay   baij->ht_total_ct  = total_ct;
549187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
550187ce0cbSSatish Balay #endif
5510bdbc534SSatish Balay   PetscFunctionReturn(0);
5520bdbc534SSatish Balay }
553133cdb44SSatish Balay 
5544a2ae208SSatish Balay #undef __FUNCT__
5554a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ"
556b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
557d6de1c52SSatish Balay {
558d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
5596849ba73SBarry Smith   PetscErrorCode ierr;
560d0f46423SBarry Smith   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
561d0f46423SBarry Smith   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
562d6de1c52SSatish Balay 
563133cdb44SSatish Balay   PetscFunctionBegin;
564d6de1c52SSatish Balay   for (i=0; i<m; i++) {
565e32f2f54SBarry Smith     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
566e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
567d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
568d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
569d6de1c52SSatish Balay       for (j=0; j<n; j++) {
570e32f2f54SBarry Smith         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
571e32f2f54SBarry Smith         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
572d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend) {
573d6de1c52SSatish Balay           col  = idxn[j] - bscstart;
57498dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575d64ed03dSBarry Smith         } else {
576905e6a2fSBarry Smith           if (!baij->colmap) {
577ab9863d7SBarry Smith             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
578905e6a2fSBarry Smith           }
579aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
5800f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
581fa46199cSSatish Balay           data--;
58248e59246SSatish Balay #else
58348e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
58448e59246SSatish Balay #endif
58548e59246SSatish Balay           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
586d9d09a02SSatish Balay           else {
58748e59246SSatish Balay             col  = data + idxn[j]%bs;
58898dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589d6de1c52SSatish Balay           }
590d6de1c52SSatish Balay         }
591d6de1c52SSatish Balay       }
592f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
593d6de1c52SSatish Balay   }
5943a40ed3dSBarry Smith   PetscFunctionReturn(0);
595d6de1c52SSatish Balay }
596d6de1c52SSatish Balay 
5974a2ae208SSatish Balay #undef __FUNCT__
5984a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ"
599dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
600d6de1c52SSatish Balay {
601d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
602d6de1c52SSatish Balay   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
603dfbe8321SBarry Smith   PetscErrorCode ierr;
604d0f46423SBarry Smith   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
605329f5518SBarry Smith   PetscReal      sum = 0.0;
6063eda8832SBarry Smith   MatScalar      *v;
607d6de1c52SSatish Balay 
608d64ed03dSBarry Smith   PetscFunctionBegin;
609d6de1c52SSatish Balay   if (baij->size == 1) {
610064f8208SBarry Smith     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
611d6de1c52SSatish Balay   } else {
612d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
613d6de1c52SSatish Balay       v  = amat->a;
6148a62d963SHong Zhang       nz = amat->nz*bs2;
6158a62d963SHong Zhang       for (i=0; i<nz; i++) {
616329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
617d6de1c52SSatish Balay       }
618d6de1c52SSatish Balay       v  = bmat->a;
6198a62d963SHong Zhang       nz = bmat->nz*bs2;
6208a62d963SHong Zhang       for (i=0; i<nz; i++) {
621329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
622d6de1c52SSatish Balay       }
623ce94432eSBarry Smith       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
6248f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
6258a62d963SHong Zhang     } else if (type == NORM_1) { /* max column sum */
6268a62d963SHong Zhang       PetscReal *tmp,*tmp2;
627899cda47SBarry Smith       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
628dcca6d9dSJed Brown       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
629d0f46423SBarry Smith       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
6308a62d963SHong Zhang       v    = amat->a; jj = amat->j;
6318a62d963SHong Zhang       for (i=0; i<amat->nz; i++) {
6328a62d963SHong Zhang         for (j=0; j<bs; j++) {
6338a62d963SHong Zhang           col = bs*(cstart + *jj) + j; /* column index */
6348a62d963SHong Zhang           for (row=0; row<bs; row++) {
6358a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v);  v++;
6368a62d963SHong Zhang           }
6378a62d963SHong Zhang         }
6388a62d963SHong Zhang         jj++;
6398a62d963SHong Zhang       }
6408a62d963SHong Zhang       v = bmat->a; jj = bmat->j;
6418a62d963SHong Zhang       for (i=0; i<bmat->nz; i++) {
6428a62d963SHong Zhang         for (j=0; j<bs; j++) {
6438a62d963SHong Zhang           col = bs*garray[*jj] + j;
6448a62d963SHong Zhang           for (row=0; row<bs; row++) {
6458a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v); v++;
6468a62d963SHong Zhang           }
6478a62d963SHong Zhang         }
6488a62d963SHong Zhang         jj++;
6498a62d963SHong Zhang       }
650ce94432eSBarry Smith       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
6518a62d963SHong Zhang       *nrm = 0.0;
652d0f46423SBarry Smith       for (j=0; j<mat->cmap->N; j++) {
6538a62d963SHong Zhang         if (tmp2[j] > *nrm) *nrm = tmp2[j];
6548a62d963SHong Zhang       }
655fca92195SBarry Smith       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
6568a62d963SHong Zhang     } else if (type == NORM_INFINITY) { /* max row sum */
657577dd1f9SKris Buschelman       PetscReal *sums;
658785e854fSJed Brown       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
6598a62d963SHong Zhang       sum  = 0.0;
6608a62d963SHong Zhang       for (j=0; j<amat->mbs; j++) {
6618a62d963SHong Zhang         for (row=0; row<bs; row++) sums[row] = 0.0;
6628a62d963SHong Zhang         v  = amat->a + bs2*amat->i[j];
6638a62d963SHong Zhang         nz = amat->i[j+1]-amat->i[j];
6648a62d963SHong Zhang         for (i=0; i<nz; i++) {
6658a62d963SHong Zhang           for (col=0; col<bs; col++) {
6668a62d963SHong Zhang             for (row=0; row<bs; row++) {
6678a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
6688a62d963SHong Zhang             }
6698a62d963SHong Zhang           }
6708a62d963SHong Zhang         }
6718a62d963SHong Zhang         v  = bmat->a + bs2*bmat->i[j];
6728a62d963SHong Zhang         nz = bmat->i[j+1]-bmat->i[j];
6738a62d963SHong Zhang         for (i=0; i<nz; i++) {
6748a62d963SHong Zhang           for (col=0; col<bs; col++) {
6758a62d963SHong Zhang             for (row=0; row<bs; row++) {
6768a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
6778a62d963SHong Zhang             }
6788a62d963SHong Zhang           }
6798a62d963SHong Zhang         }
6808a62d963SHong Zhang         for (row=0; row<bs; row++) {
6818a62d963SHong Zhang           if (sums[row] > sum) sum = sums[row];
6828a62d963SHong Zhang         }
6838a62d963SHong Zhang       }
684ce94432eSBarry Smith       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
685577dd1f9SKris Buschelman       ierr = PetscFree(sums);CHKERRQ(ierr);
686ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
687d64ed03dSBarry Smith   }
6883a40ed3dSBarry Smith   PetscFunctionReturn(0);
689d6de1c52SSatish Balay }
69057b952d6SSatish Balay 
691fef45726SSatish Balay /*
692fef45726SSatish Balay   Creates the hash table, and sets the table
693fef45726SSatish Balay   This table is created only once.
694fef45726SSatish Balay   If new entried need to be added to the matrix
695fef45726SSatish Balay   then the hash table has to be destroyed and
696fef45726SSatish Balay   recreated.
697fef45726SSatish Balay */
6984a2ae208SSatish Balay #undef __FUNCT__
6994a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
700dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
701596b8d2eSBarry Smith {
702596b8d2eSBarry Smith   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
703596b8d2eSBarry Smith   Mat            A     = baij->A,B=baij->B;
704596b8d2eSBarry Smith   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
705b24ad042SBarry Smith   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
7066849ba73SBarry Smith   PetscErrorCode ierr;
707fca92195SBarry Smith   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
708899cda47SBarry Smith   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
709b24ad042SBarry Smith   PetscInt       *HT,key;
7103eda8832SBarry Smith   MatScalar      **HD;
711329f5518SBarry Smith   PetscReal      tmp;
7126cf91177SBarry Smith #if defined(PETSC_USE_INFO)
713b24ad042SBarry Smith   PetscInt ct=0,max=0;
7144a15367fSSatish Balay #endif
715fef45726SSatish Balay 
716d64ed03dSBarry Smith   PetscFunctionBegin;
717fca92195SBarry Smith   if (baij->ht) PetscFunctionReturn(0);
718fef45726SSatish Balay 
719fca92195SBarry Smith   baij->ht_size = (PetscInt)(factor*nz);
720fca92195SBarry Smith   ht_size       = baij->ht_size;
7210bdbc534SSatish Balay 
722fef45726SSatish Balay   /* Allocate Memory for Hash Table */
7231795a4d1SJed Brown   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
724b9e4cc15SSatish Balay   HD   = baij->hd;
725a07cd24cSSatish Balay   HT   = baij->ht;
726b9e4cc15SSatish Balay 
727596b8d2eSBarry Smith   /* Loop Over A */
7280bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
729596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
7300bdbc534SSatish Balay       row = i+rstart;
7310bdbc534SSatish Balay       col = aj[j]+cstart;
732596b8d2eSBarry Smith 
733187ce0cbSSatish Balay       key = row*Nbs + col + 1;
734fca92195SBarry Smith       h1  = HASH(ht_size,key,tmp);
735fca92195SBarry Smith       for (k=0; k<ht_size; k++) {
736fca92195SBarry Smith         if (!HT[(h1+k)%ht_size]) {
737fca92195SBarry Smith           HT[(h1+k)%ht_size] = key;
738fca92195SBarry Smith           HD[(h1+k)%ht_size] = a->a + j*bs2;
739596b8d2eSBarry Smith           break;
7406cf91177SBarry Smith #if defined(PETSC_USE_INFO)
741187ce0cbSSatish Balay         } else {
742187ce0cbSSatish Balay           ct++;
743187ce0cbSSatish Balay #endif
744596b8d2eSBarry Smith         }
745187ce0cbSSatish Balay       }
7466cf91177SBarry Smith #if defined(PETSC_USE_INFO)
747187ce0cbSSatish Balay       if (k> max) max = k;
748187ce0cbSSatish Balay #endif
749596b8d2eSBarry Smith     }
750596b8d2eSBarry Smith   }
751596b8d2eSBarry Smith   /* Loop Over B */
7520bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
753596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
7540bdbc534SSatish Balay       row = i+rstart;
7550bdbc534SSatish Balay       col = garray[bj[j]];
756187ce0cbSSatish Balay       key = row*Nbs + col + 1;
757fca92195SBarry Smith       h1  = HASH(ht_size,key,tmp);
758fca92195SBarry Smith       for (k=0; k<ht_size; k++) {
759fca92195SBarry Smith         if (!HT[(h1+k)%ht_size]) {
760fca92195SBarry Smith           HT[(h1+k)%ht_size] = key;
761fca92195SBarry Smith           HD[(h1+k)%ht_size] = b->a + j*bs2;
762596b8d2eSBarry Smith           break;
7636cf91177SBarry Smith #if defined(PETSC_USE_INFO)
764187ce0cbSSatish Balay         } else {
765187ce0cbSSatish Balay           ct++;
766187ce0cbSSatish Balay #endif
767596b8d2eSBarry Smith         }
768187ce0cbSSatish Balay       }
7696cf91177SBarry Smith #if defined(PETSC_USE_INFO)
770187ce0cbSSatish Balay       if (k> max) max = k;
771187ce0cbSSatish Balay #endif
772596b8d2eSBarry Smith     }
773596b8d2eSBarry Smith   }
774596b8d2eSBarry Smith 
775596b8d2eSBarry Smith   /* Print Summary */
7766cf91177SBarry Smith #if defined(PETSC_USE_INFO)
777fca92195SBarry Smith   for (i=0,j=0; i<ht_size; i++) {
77826fbe8dcSKarl Rupp     if (HT[i]) j++;
779c38d4ed2SBarry Smith   }
7801e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
781187ce0cbSSatish Balay #endif
7823a40ed3dSBarry Smith   PetscFunctionReturn(0);
783596b8d2eSBarry Smith }
78457b952d6SSatish Balay 
7854a2ae208SSatish Balay #undef __FUNCT__
7864a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
787dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
788bbb85fb3SSatish Balay {
789bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
790dfbe8321SBarry Smith   PetscErrorCode ierr;
791b24ad042SBarry Smith   PetscInt       nstash,reallocs;
792bbb85fb3SSatish Balay   InsertMode     addv;
793bbb85fb3SSatish Balay 
794bbb85fb3SSatish Balay   PetscFunctionBegin;
79526fbe8dcSKarl Rupp   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
796bbb85fb3SSatish Balay 
797bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
798ce94432eSBarry Smith   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
799ce94432eSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
800bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
801bbb85fb3SSatish Balay 
802d0f46423SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
8031e2582c4SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
8048798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
8051e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
80646680499SSatish Balay   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
8071e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
808bbb85fb3SSatish Balay   PetscFunctionReturn(0);
809bbb85fb3SSatish Balay }
810bbb85fb3SSatish Balay 
8114a2ae208SSatish Balay #undef __FUNCT__
8124a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
813dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
814bbb85fb3SSatish Balay {
815bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
81691c97fd4SSatish Balay   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
8176849ba73SBarry Smith   PetscErrorCode ierr;
818b24ad042SBarry Smith   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
819e44c0bd4SBarry Smith   PetscInt       *row,*col;
820ace3abfcSBarry Smith   PetscBool      r1,r2,r3,other_disassembled;
8213eda8832SBarry Smith   MatScalar      *val;
822bbb85fb3SSatish Balay   InsertMode     addv = mat->insertmode;
823b24ad042SBarry Smith   PetscMPIInt    n;
824bbb85fb3SSatish Balay 
825bbb85fb3SSatish Balay   PetscFunctionBegin;
8265fd66863SKarl Rupp   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
8274cb17eb5SBarry Smith   if (!baij->donotstash && !mat->nooffprocentries) {
828a2d1c673SSatish Balay     while (1) {
8298798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
830a2d1c673SSatish Balay       if (!flg) break;
831a2d1c673SSatish Balay 
832bbb85fb3SSatish Balay       for (i=0; i<n;) {
833bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
83426fbe8dcSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
83526fbe8dcSKarl Rupp           if (row[j] != rstart) break;
83626fbe8dcSKarl Rupp         }
837bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
838bbb85fb3SSatish Balay         else       ncols = n-i;
839bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
84097e5c40aSBarry Smith         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
841bbb85fb3SSatish Balay         i    = j;
842bbb85fb3SSatish Balay       }
843bbb85fb3SSatish Balay     }
8448798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
845a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
846a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
847a2d1c673SSatish Balay        restore the original flags */
848a2d1c673SSatish Balay     r1 = baij->roworiented;
849a2d1c673SSatish Balay     r2 = a->roworiented;
85091c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
85126fbe8dcSKarl Rupp 
8527c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
8537c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
85426fbe8dcSKarl Rupp 
85591c97fd4SSatish Balay     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
856a2d1c673SSatish Balay     while (1) {
8578798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
858a2d1c673SSatish Balay       if (!flg) break;
859a2d1c673SSatish Balay 
860a2d1c673SSatish Balay       for (i=0; i<n;) {
861a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
86226fbe8dcSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
86326fbe8dcSKarl Rupp           if (row[j] != rstart) break;
86426fbe8dcSKarl Rupp         }
865a2d1c673SSatish Balay         if (j < n) ncols = j-i;
866a2d1c673SSatish Balay         else       ncols = n-i;
86797e5c40aSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
868a2d1c673SSatish Balay         i    = j;
869a2d1c673SSatish Balay       }
870a2d1c673SSatish Balay     }
8718798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
87226fbe8dcSKarl Rupp 
873a2d1c673SSatish Balay     baij->roworiented = r1;
874a2d1c673SSatish Balay     a->roworiented    = r2;
87526fbe8dcSKarl Rupp 
87691c97fd4SSatish Balay     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
877bbb85fb3SSatish Balay   }
878bbb85fb3SSatish Balay 
879bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
880bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
881bbb85fb3SSatish Balay 
882bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
883bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
884bbb85fb3SSatish Balay   /*
885bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
886bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
887bbb85fb3SSatish Balay   */
888bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
889ce94432eSBarry Smith     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
890bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
891ab9863d7SBarry Smith       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
892bbb85fb3SSatish Balay     }
893bbb85fb3SSatish Balay   }
894bbb85fb3SSatish Balay 
895bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
896bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
897bbb85fb3SSatish Balay   }
898bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
899bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
900bbb85fb3SSatish Balay 
9016cf91177SBarry Smith #if defined(PETSC_USE_INFO)
902bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
9031e2582c4SBarry Smith     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
90426fbe8dcSKarl Rupp 
905bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
906bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
907bbb85fb3SSatish Balay   }
908bbb85fb3SSatish Balay #endif
909bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
910bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
91126fbe8dcSKarl Rupp 
912bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
913bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
914bbb85fb3SSatish Balay   }
915bbb85fb3SSatish Balay 
916fca92195SBarry Smith   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
91726fbe8dcSKarl Rupp 
918606d414cSSatish Balay   baij->rowvalues = 0;
9194f9cfa9eSBarry Smith 
9204f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
9214f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
922e56f5c9eSBarry Smith     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
92309e82e2bSBarry Smith     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
924e56f5c9eSBarry Smith   }
925bbb85fb3SSatish Balay   PetscFunctionReturn(0);
926bbb85fb3SSatish Balay }
92757b952d6SSatish Balay 
9287da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
9299804daf3SBarry Smith #include <petscdraw.h>
9304a2ae208SSatish Balay #undef __FUNCT__
9314a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
9326849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
93357b952d6SSatish Balay {
93457b952d6SSatish Balay   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
935dfbe8321SBarry Smith   PetscErrorCode    ierr;
9367da1fb6eSBarry Smith   PetscMPIInt       rank = baij->rank;
937d0f46423SBarry Smith   PetscInt          bs   = mat->rmap->bs;
938ace3abfcSBarry Smith   PetscBool         iascii,isdraw;
939b0a32e0cSBarry Smith   PetscViewer       sviewer;
940f3ef73ceSBarry Smith   PetscViewerFormat format;
94157b952d6SSatish Balay 
942d64ed03dSBarry Smith   PetscFunctionBegin;
943251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
944251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
94532077d6dSBarry Smith   if (iascii) {
946b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
947456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
9484e220ebcSLois Curfman McInnes       MatInfo info;
949ce94432eSBarry Smith       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
950d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
9517b23a99aSBarry Smith       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
95277431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
95316608c43SJed Brown                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
954d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
955e6dd01d4SJed Brown       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
956d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
957e6dd01d4SJed Brown       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
958b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
9597b23a99aSBarry Smith       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
96007d81ca4SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
96157b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
9623a40ed3dSBarry Smith       PetscFunctionReturn(0);
963fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
96477431f27SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
9653a40ed3dSBarry Smith       PetscFunctionReturn(0);
96604929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
96704929863SHong Zhang       PetscFunctionReturn(0);
96857b952d6SSatish Balay     }
96957b952d6SSatish Balay   }
97057b952d6SSatish Balay 
9710f5bd95cSBarry Smith   if (isdraw) {
972b0a32e0cSBarry Smith     PetscDraw draw;
973ace3abfcSBarry Smith     PetscBool isnull;
974b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
975b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
97657b952d6SSatish Balay   }
97757b952d6SSatish Balay 
9787da1fb6eSBarry Smith   {
97957b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
98057b952d6SSatish Balay     Mat         A;
98157b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
982d0f46423SBarry Smith     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
9833eda8832SBarry Smith     MatScalar   *a;
9843e219373SBarry Smith     const char  *matname;
98557b952d6SSatish Balay 
986f204ca49SKris Buschelman     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
987f204ca49SKris Buschelman     /* Perhaps this should be the type of mat? */
988ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
98957b952d6SSatish Balay     if (!rank) {
990f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
991d64ed03dSBarry Smith     } else {
992f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
99357b952d6SSatish Balay     }
994f204ca49SKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
9950298fd71SBarry Smith     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
9962b82e772SSatish Balay     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
9973bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
99857b952d6SSatish Balay 
99957b952d6SSatish Balay     /* copy over the A part */
100057b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->A->data;
100157b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1002785e854fSJed Brown     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
100357b952d6SSatish Balay 
100457b952d6SSatish Balay     for (i=0; i<mbs; i++) {
1005899cda47SBarry Smith       rvals[0] = bs*(baij->rstartbs + i);
100626fbe8dcSKarl Rupp       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
100757b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1008899cda47SBarry Smith         col = (baij->cstartbs+aj[j])*bs;
100957b952d6SSatish Balay         for (k=0; k<bs; k++) {
101097e5c40aSBarry Smith           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1011cee3aa6bSSatish Balay           col++; a += bs;
101257b952d6SSatish Balay         }
101357b952d6SSatish Balay       }
101457b952d6SSatish Balay     }
101557b952d6SSatish Balay     /* copy over the B part */
101657b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
101757b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
101857b952d6SSatish Balay     for (i=0; i<mbs; i++) {
1019899cda47SBarry Smith       rvals[0] = bs*(baij->rstartbs + i);
102026fbe8dcSKarl Rupp       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
102157b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
102257b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
102357b952d6SSatish Balay         for (k=0; k<bs; k++) {
102497e5c40aSBarry Smith           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1025cee3aa6bSSatish Balay           col++; a += bs;
102657b952d6SSatish Balay         }
102757b952d6SSatish Balay       }
102857b952d6SSatish Balay     }
1029606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
10306d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10316d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
103255843e3eSBarry Smith     /*
103355843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1034b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
103555843e3eSBarry Smith     */
1036b0a32e0cSBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1037ade3a672SBarry Smith     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
10383e219373SBarry Smith     if (!rank) {
1039ade3a672SBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
10407da1fb6eSBarry Smith       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
104157b952d6SSatish Balay     }
1042b0a32e0cSBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
10436bf464f9SBarry Smith     ierr = MatDestroy(&A);CHKERRQ(ierr);
104457b952d6SSatish Balay   }
10453a40ed3dSBarry Smith   PetscFunctionReturn(0);
104657b952d6SSatish Balay }
104757b952d6SSatish Balay 
10484a2ae208SSatish Balay #undef __FUNCT__
1049660746e0SBarry Smith #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1050660746e0SBarry Smith static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1051660746e0SBarry Smith {
1052660746e0SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1053660746e0SBarry Smith   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1054660746e0SBarry Smith   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1055660746e0SBarry Smith   PetscErrorCode ierr;
10565f48b12bSBarry Smith   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1057e96a6426SSatish Balay   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1058660746e0SBarry Smith   int            fd;
1059660746e0SBarry Smith   PetscScalar    *column_values;
1060660746e0SBarry Smith   FILE           *file;
1061660746e0SBarry Smith   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1062638eb2ebSBarry Smith   PetscInt       message_count,flowcontrolcount;
1063660746e0SBarry Smith 
1064660746e0SBarry Smith   PetscFunctionBegin;
1065ce94432eSBarry Smith   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1066ce94432eSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1067660746e0SBarry Smith   nz   = bs2*(A->nz + B->nz);
1068660746e0SBarry Smith   rlen = mat->rmap->n;
1069660746e0SBarry Smith   if (!rank) {
1070660746e0SBarry Smith     header[0] = MAT_FILE_CLASSID;
1071660746e0SBarry Smith     header[1] = mat->rmap->N;
1072660746e0SBarry Smith     header[2] = mat->cmap->N;
107326fbe8dcSKarl Rupp 
1074ce94432eSBarry Smith     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1075660746e0SBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1076660746e0SBarry Smith     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1077660746e0SBarry Smith     /* get largest number of rows any processor has */
1078660746e0SBarry Smith     range = mat->rmap->range;
1079660746e0SBarry Smith     for (i=1; i<size; i++) {
1080660746e0SBarry Smith       rlen = PetscMax(rlen,range[i+1] - range[i]);
1081660746e0SBarry Smith     }
1082660746e0SBarry Smith   } else {
1083ce94432eSBarry Smith     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1084660746e0SBarry Smith   }
1085660746e0SBarry Smith 
1086785e854fSJed Brown   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1087660746e0SBarry Smith   /* compute lengths of each row  */
1088660746e0SBarry Smith   for (i=0; i<a->mbs; i++) {
1089660746e0SBarry Smith     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1090660746e0SBarry Smith   }
1091660746e0SBarry Smith   /* store the row lengths to the file */
1092638eb2ebSBarry Smith   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1093660746e0SBarry Smith   if (!rank) {
1094660746e0SBarry Smith     MPI_Status status;
1095785e854fSJed Brown     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1096660746e0SBarry Smith     rlen = (range[1] - range[0])/bs;
1097660746e0SBarry Smith     for (i=0; i<rlen; i++) {
1098660746e0SBarry Smith       for (j=0; j<bs; j++) {
1099660746e0SBarry Smith         row_lens[i*bs+j] = bs*crow_lens[i];
1100660746e0SBarry Smith       }
1101660746e0SBarry Smith     }
1102660746e0SBarry Smith     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1103660746e0SBarry Smith     for (i=1; i<size; i++) {
1104660746e0SBarry Smith       rlen = (range[i+1] - range[i])/bs;
1105639ff905SBarry Smith       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1106ce94432eSBarry Smith       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1107660746e0SBarry Smith       for (k=0; k<rlen; k++) {
1108660746e0SBarry Smith         for (j=0; j<bs; j++) {
1109660746e0SBarry Smith           row_lens[k*bs+j] = bs*crow_lens[k];
1110660746e0SBarry Smith         }
1111660746e0SBarry Smith       }
1112660746e0SBarry Smith       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1113660746e0SBarry Smith     }
1114639ff905SBarry Smith     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1115660746e0SBarry Smith     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1116660746e0SBarry Smith   } else {
1117639ff905SBarry Smith     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1118ce94432eSBarry Smith     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1119639ff905SBarry Smith     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1120660746e0SBarry Smith   }
1121660746e0SBarry Smith   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1122660746e0SBarry Smith 
1123660746e0SBarry Smith   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1124660746e0SBarry Smith      information needed to make it for each row from a block row. This does require more communication but still not more than
1125660746e0SBarry Smith      the communication needed for the nonzero values  */
1126660746e0SBarry Smith   nzmax = nz; /*  space a largest processor needs */
1127ce94432eSBarry Smith   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1128785e854fSJed Brown   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1129660746e0SBarry Smith   cnt   = 0;
1130660746e0SBarry Smith   for (i=0; i<a->mbs; i++) {
1131660746e0SBarry Smith     pcnt = cnt;
1132660746e0SBarry Smith     for (j=B->i[i]; j<B->i[i+1]; j++) {
1133660746e0SBarry Smith       if ((col = garray[B->j[j]]) > cstart) break;
1134660746e0SBarry Smith       for (l=0; l<bs; l++) {
1135660746e0SBarry Smith         column_indices[cnt++] = bs*col+l;
1136660746e0SBarry Smith       }
1137660746e0SBarry Smith     }
1138660746e0SBarry Smith     for (k=A->i[i]; k<A->i[i+1]; k++) {
1139660746e0SBarry Smith       for (l=0; l<bs; l++) {
1140660746e0SBarry Smith         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1141660746e0SBarry Smith       }
1142660746e0SBarry Smith     }
1143660746e0SBarry Smith     for (; j<B->i[i+1]; j++) {
1144660746e0SBarry Smith       for (l=0; l<bs; l++) {
1145660746e0SBarry Smith         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1146660746e0SBarry Smith       }
1147660746e0SBarry Smith     }
1148660746e0SBarry Smith     len = cnt - pcnt;
1149660746e0SBarry Smith     for (k=1; k<bs; k++) {
1150660746e0SBarry Smith       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1151660746e0SBarry Smith       cnt += len;
1152660746e0SBarry Smith     }
1153660746e0SBarry Smith   }
1154660746e0SBarry Smith   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1155660746e0SBarry Smith 
1156660746e0SBarry Smith   /* store the columns to the file */
1157638eb2ebSBarry Smith   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1158660746e0SBarry Smith   if (!rank) {
1159660746e0SBarry Smith     MPI_Status status;
1160660746e0SBarry Smith     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1161660746e0SBarry Smith     for (i=1; i<size; i++) {
1162639ff905SBarry Smith       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1163ce94432eSBarry Smith       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1164ce94432eSBarry Smith       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1165660746e0SBarry Smith       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1166660746e0SBarry Smith     }
1167639ff905SBarry Smith     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1168660746e0SBarry Smith   } else {
1169639ff905SBarry Smith     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1170ce94432eSBarry Smith     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1171ce94432eSBarry Smith     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1172639ff905SBarry Smith     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1173660746e0SBarry Smith   }
1174660746e0SBarry Smith   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1175660746e0SBarry Smith 
1176660746e0SBarry Smith   /* load up the numerical values */
1177785e854fSJed Brown   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1178660746e0SBarry Smith   cnt  = 0;
1179660746e0SBarry Smith   for (i=0; i<a->mbs; i++) {
1180660746e0SBarry Smith     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1181660746e0SBarry Smith     for (j=B->i[i]; j<B->i[i+1]; j++) {
1182660746e0SBarry Smith       if (garray[B->j[j]] > cstart) break;
1183660746e0SBarry Smith       for (l=0; l<bs; l++) {
1184660746e0SBarry Smith         for (ll=0; ll<bs; ll++) {
1185660746e0SBarry Smith           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1186660746e0SBarry Smith         }
1187660746e0SBarry Smith       }
1188660746e0SBarry Smith       cnt += bs;
1189660746e0SBarry Smith     }
1190660746e0SBarry Smith     for (k=A->i[i]; k<A->i[i+1]; k++) {
1191660746e0SBarry Smith       for (l=0; l<bs; l++) {
1192660746e0SBarry Smith         for (ll=0; ll<bs; ll++) {
1193660746e0SBarry Smith           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1194660746e0SBarry Smith         }
1195660746e0SBarry Smith       }
1196660746e0SBarry Smith       cnt += bs;
1197660746e0SBarry Smith     }
1198660746e0SBarry Smith     for (; j<B->i[i+1]; j++) {
1199660746e0SBarry Smith       for (l=0; l<bs; l++) {
1200660746e0SBarry Smith         for (ll=0; ll<bs; ll++) {
1201660746e0SBarry Smith           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1202660746e0SBarry Smith         }
1203660746e0SBarry Smith       }
1204660746e0SBarry Smith       cnt += bs;
1205660746e0SBarry Smith     }
1206660746e0SBarry Smith     cnt += (bs-1)*rlen;
1207660746e0SBarry Smith   }
1208660746e0SBarry Smith   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1209660746e0SBarry Smith 
1210660746e0SBarry Smith   /* store the column values to the file */
1211638eb2ebSBarry Smith   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1212660746e0SBarry Smith   if (!rank) {
1213660746e0SBarry Smith     MPI_Status status;
1214660746e0SBarry Smith     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1215660746e0SBarry Smith     for (i=1; i<size; i++) {
1216639ff905SBarry Smith       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1217ce94432eSBarry Smith       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1218ce94432eSBarry Smith       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1219660746e0SBarry Smith       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1220660746e0SBarry Smith     }
1221639ff905SBarry Smith     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1222660746e0SBarry Smith   } else {
1223639ff905SBarry Smith     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1224ce94432eSBarry Smith     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225ce94432eSBarry Smith     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226639ff905SBarry Smith     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1227660746e0SBarry Smith   }
1228660746e0SBarry Smith   ierr = PetscFree(column_values);CHKERRQ(ierr);
1229660746e0SBarry Smith 
1230660746e0SBarry Smith   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1231660746e0SBarry Smith   if (file) {
1232660746e0SBarry Smith     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1233660746e0SBarry Smith   }
1234660746e0SBarry Smith   PetscFunctionReturn(0);
1235660746e0SBarry Smith }
1236660746e0SBarry Smith 
1237660746e0SBarry Smith #undef __FUNCT__
12384a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ"
1239dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
124057b952d6SSatish Balay {
1241dfbe8321SBarry Smith   PetscErrorCode ierr;
1242ace3abfcSBarry Smith   PetscBool      iascii,isdraw,issocket,isbinary;
124357b952d6SSatish Balay 
1244d64ed03dSBarry Smith   PetscFunctionBegin;
1245251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1246251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1247251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1248251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1249660746e0SBarry Smith   if (iascii || isdraw || issocket) {
12507b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1251660746e0SBarry Smith   } else if (isbinary) {
1252660746e0SBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
125357b952d6SSatish Balay   }
12543a40ed3dSBarry Smith   PetscFunctionReturn(0);
125557b952d6SSatish Balay }
125657b952d6SSatish Balay 
12574a2ae208SSatish Balay #undef __FUNCT__
12584a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ"
1259dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
126079bdfe76SSatish Balay {
126179bdfe76SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1262dfbe8321SBarry Smith   PetscErrorCode ierr;
126379bdfe76SSatish Balay 
1264d64ed03dSBarry Smith   PetscFunctionBegin;
1265aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1266d0f46423SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
126779bdfe76SSatish Balay #endif
12688798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12698798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
12706bf464f9SBarry Smith   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
12716bf464f9SBarry Smith   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1272aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
12736bc0bbbfSBarry Smith   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
127448e59246SSatish Balay #else
127505b42c5fSBarry Smith   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
127648e59246SSatish Balay #endif
127705b42c5fSBarry Smith   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
12786bf464f9SBarry Smith   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
12796bf464f9SBarry Smith   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1280fca92195SBarry Smith   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
128105b42c5fSBarry Smith   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1282fca92195SBarry Smith   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1283899cda47SBarry Smith   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1284bf0cc555SLisandro Dalcin   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1285901853e0SKris Buschelman 
1286dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1287bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1288bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1289bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1290bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1291bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1292bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1293bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1294bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1295bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
12963a40ed3dSBarry Smith   PetscFunctionReturn(0);
129779bdfe76SSatish Balay }
129879bdfe76SSatish Balay 
12994a2ae208SSatish Balay #undef __FUNCT__
13004a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ"
1301dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1302cee3aa6bSSatish Balay {
1303cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1304dfbe8321SBarry Smith   PetscErrorCode ierr;
1305b24ad042SBarry Smith   PetscInt       nt;
1306cee3aa6bSSatish Balay 
1307d64ed03dSBarry Smith   PetscFunctionBegin;
1308e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1309e7e72b3dSBarry Smith   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1310e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1311e7e72b3dSBarry Smith   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1312ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1313f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1314ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1315f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
13163a40ed3dSBarry Smith   PetscFunctionReturn(0);
1317cee3aa6bSSatish Balay }
1318cee3aa6bSSatish Balay 
13194a2ae208SSatish Balay #undef __FUNCT__
13204a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1321dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1322cee3aa6bSSatish Balay {
1323cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1324dfbe8321SBarry Smith   PetscErrorCode ierr;
1325d64ed03dSBarry Smith 
1326d64ed03dSBarry Smith   PetscFunctionBegin;
1327ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1328f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1329ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1330f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
13313a40ed3dSBarry Smith   PetscFunctionReturn(0);
1332cee3aa6bSSatish Balay }
1333cee3aa6bSSatish Balay 
13344a2ae208SSatish Balay #undef __FUNCT__
13354a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1336dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1337cee3aa6bSSatish Balay {
1338cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1339dfbe8321SBarry Smith   PetscErrorCode ierr;
1340ace3abfcSBarry Smith   PetscBool      merged;
1341cee3aa6bSSatish Balay 
1342d64ed03dSBarry Smith   PetscFunctionBegin;
1343a5ff213dSBarry Smith   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1344cee3aa6bSSatish Balay   /* do nondiagonal part */
13457c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1346a5ff213dSBarry Smith   if (!merged) {
1347cee3aa6bSSatish Balay     /* send it on its way */
1348ca9f406cSSatish Balay     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1349cee3aa6bSSatish Balay     /* do local part */
13507c922b88SBarry Smith     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1351cee3aa6bSSatish Balay     /* receive remote parts: note this assumes the values are not actually */
1352a5ff213dSBarry Smith     /* inserted in yy until the next line */
1353ca9f406cSSatish Balay     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1354a5ff213dSBarry Smith   } else {
1355a5ff213dSBarry Smith     /* do local part */
1356a5ff213dSBarry Smith     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1357a5ff213dSBarry Smith     /* send it on its way */
1358ca9f406cSSatish Balay     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1359a5ff213dSBarry Smith     /* values actually were received in the Begin() but we need to call this nop */
1360ca9f406cSSatish Balay     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1361a5ff213dSBarry Smith   }
13623a40ed3dSBarry Smith   PetscFunctionReturn(0);
1363cee3aa6bSSatish Balay }
1364cee3aa6bSSatish Balay 
13654a2ae208SSatish Balay #undef __FUNCT__
13664a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1367dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1368cee3aa6bSSatish Balay {
1369cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1370dfbe8321SBarry Smith   PetscErrorCode ierr;
1371cee3aa6bSSatish Balay 
1372d64ed03dSBarry Smith   PetscFunctionBegin;
1373cee3aa6bSSatish Balay   /* do nondiagonal part */
13747c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1375cee3aa6bSSatish Balay   /* send it on its way */
1376ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1377cee3aa6bSSatish Balay   /* do local part */
13787c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1379cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1380cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1381cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1382ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
13833a40ed3dSBarry Smith   PetscFunctionReturn(0);
1384cee3aa6bSSatish Balay }
1385cee3aa6bSSatish Balay 
1386cee3aa6bSSatish Balay /*
1387cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1388cee3aa6bSSatish Balay    diagonal block
1389cee3aa6bSSatish Balay */
13904a2ae208SSatish Balay #undef __FUNCT__
13914a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1392dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1393cee3aa6bSSatish Balay {
1394cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1395dfbe8321SBarry Smith   PetscErrorCode ierr;
1396d64ed03dSBarry Smith 
1397d64ed03dSBarry Smith   PetscFunctionBegin;
1398e32f2f54SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
13993a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
14003a40ed3dSBarry Smith   PetscFunctionReturn(0);
1401cee3aa6bSSatish Balay }
1402cee3aa6bSSatish Balay 
14034a2ae208SSatish Balay #undef __FUNCT__
14044a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ"
1405f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1406cee3aa6bSSatish Balay {
1407cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1408dfbe8321SBarry Smith   PetscErrorCode ierr;
1409d64ed03dSBarry Smith 
1410d64ed03dSBarry Smith   PetscFunctionBegin;
1411f4df32b1SMatthew Knepley   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1412f4df32b1SMatthew Knepley   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
14133a40ed3dSBarry Smith   PetscFunctionReturn(0);
1414cee3aa6bSSatish Balay }
1415026e39d0SSatish Balay 
14164a2ae208SSatish Balay #undef __FUNCT__
14174a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ"
1418b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1419acdf5bf4SSatish Balay {
1420acdf5bf4SSatish Balay   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
142187828ca2SBarry Smith   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
14226849ba73SBarry Smith   PetscErrorCode ierr;
1423d0f46423SBarry Smith   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1424d0f46423SBarry Smith   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1425899cda47SBarry Smith   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1426acdf5bf4SSatish Balay 
1427d64ed03dSBarry Smith   PetscFunctionBegin;
1428e7e72b3dSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1429e32f2f54SBarry Smith   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1430acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1431acdf5bf4SSatish Balay 
1432acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1433acdf5bf4SSatish Balay     /*
1434acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1435acdf5bf4SSatish Balay     */
1436acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1437b24ad042SBarry Smith     PetscInt    max = 1,mbs = mat->mbs,tmp;
1438bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1439acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
144026fbe8dcSKarl Rupp       if (max < tmp) max = tmp;
1441acdf5bf4SSatish Balay     }
1442dcca6d9dSJed Brown     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1443acdf5bf4SSatish Balay   }
1444d9d09a02SSatish Balay   lrow = row - brstart;
1445acdf5bf4SSatish Balay 
1446acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1447acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1448acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1449f830108cSBarry Smith   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1450f830108cSBarry Smith   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1451acdf5bf4SSatish Balay   nztot = nzA + nzB;
1452acdf5bf4SSatish Balay 
1453acdf5bf4SSatish Balay   cmap = mat->garray;
1454acdf5bf4SSatish Balay   if (v  || idx) {
1455acdf5bf4SSatish Balay     if (nztot) {
1456acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1457b24ad042SBarry Smith       PetscInt imark = -1;
1458acdf5bf4SSatish Balay       if (v) {
1459acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1460acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1461d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1462acdf5bf4SSatish Balay           else break;
1463acdf5bf4SSatish Balay         }
1464acdf5bf4SSatish Balay         imark = i;
1465acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1466acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1467acdf5bf4SSatish Balay       }
1468acdf5bf4SSatish Balay       if (idx) {
1469acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1470acdf5bf4SSatish Balay         if (imark > -1) {
1471acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1472bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1473acdf5bf4SSatish Balay           }
1474acdf5bf4SSatish Balay         } else {
1475acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
147626fbe8dcSKarl Rupp             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1477acdf5bf4SSatish Balay             else break;
1478acdf5bf4SSatish Balay           }
1479acdf5bf4SSatish Balay           imark = i;
1480acdf5bf4SSatish Balay         }
1481d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1482d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1483acdf5bf4SSatish Balay       }
1484d64ed03dSBarry Smith     } else {
1485d212a18eSSatish Balay       if (idx) *idx = 0;
1486d212a18eSSatish Balay       if (v)   *v   = 0;
1487d212a18eSSatish Balay     }
1488acdf5bf4SSatish Balay   }
1489acdf5bf4SSatish Balay   *nz  = nztot;
1490f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1491f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
14923a40ed3dSBarry Smith   PetscFunctionReturn(0);
1493acdf5bf4SSatish Balay }
1494acdf5bf4SSatish Balay 
14954a2ae208SSatish Balay #undef __FUNCT__
14964a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1497b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1498acdf5bf4SSatish Balay {
1499acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1500d64ed03dSBarry Smith 
1501d64ed03dSBarry Smith   PetscFunctionBegin;
1502e7e72b3dSBarry Smith   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1503acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
15043a40ed3dSBarry Smith   PetscFunctionReturn(0);
1505acdf5bf4SSatish Balay }
1506acdf5bf4SSatish Balay 
15074a2ae208SSatish Balay #undef __FUNCT__
15084a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1509dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
151058667388SSatish Balay {
151158667388SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1512dfbe8321SBarry Smith   PetscErrorCode ierr;
1513d64ed03dSBarry Smith 
1514d64ed03dSBarry Smith   PetscFunctionBegin;
151558667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
151658667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
15173a40ed3dSBarry Smith   PetscFunctionReturn(0);
151858667388SSatish Balay }
15190ac07820SSatish Balay 
15204a2ae208SSatish Balay #undef __FUNCT__
15214a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1522dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
15230ac07820SSatish Balay {
15244e220ebcSLois Curfman McInnes   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
15254e220ebcSLois Curfman McInnes   Mat            A  = a->A,B = a->B;
1526dfbe8321SBarry Smith   PetscErrorCode ierr;
1527329f5518SBarry Smith   PetscReal      isend[5],irecv[5];
15280ac07820SSatish Balay 
1529d64ed03dSBarry Smith   PetscFunctionBegin;
1530d0f46423SBarry Smith   info->block_size = (PetscReal)matin->rmap->bs;
153126fbe8dcSKarl Rupp 
15324e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
153326fbe8dcSKarl Rupp 
15340e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1535de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
153626fbe8dcSKarl Rupp 
15374e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
153826fbe8dcSKarl Rupp 
15390e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1540de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
154126fbe8dcSKarl Rupp 
15420ac07820SSatish Balay   if (flag == MAT_LOCAL) {
15434e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
15444e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
15454e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
15464e220ebcSLois Curfman McInnes     info->memory       = isend[3];
15474e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
15480ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1549ce94432eSBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
155026fbe8dcSKarl Rupp 
15514e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15524e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15534e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15544e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15554e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
15560ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1557ce94432eSBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
155826fbe8dcSKarl Rupp 
15594e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15604e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15614e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15624e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15634e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1564ce94432eSBarry Smith   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
15654e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
15664e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
15674e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
15683a40ed3dSBarry Smith   PetscFunctionReturn(0);
15690ac07820SSatish Balay }
15700ac07820SSatish Balay 
15714a2ae208SSatish Balay #undef __FUNCT__
15724a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ"
1573ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
157458667388SSatish Balay {
157558667388SSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1576dfbe8321SBarry Smith   PetscErrorCode ierr;
157758667388SSatish Balay 
1578d64ed03dSBarry Smith   PetscFunctionBegin;
157912c028f9SKris Buschelman   switch (op) {
1580512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
158112c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
158228b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1583a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
158412c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
15854e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
15864e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
158712c028f9SKris Buschelman     break;
158812c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
15894e0d8c25SBarry Smith     a->roworiented = flg;
159026fbe8dcSKarl Rupp 
15914e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
15924e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
159312c028f9SKris Buschelman     break;
15944e0d8c25SBarry Smith   case MAT_NEW_DIAGONALS:
1595290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
159612c028f9SKris Buschelman     break;
159712c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
15984e0d8c25SBarry Smith     a->donotstash = flg;
159912c028f9SKris Buschelman     break;
160012c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
16014e0d8c25SBarry Smith     a->ht_flag = flg;
160212c028f9SKris Buschelman     break;
160377e54ba9SKris Buschelman   case MAT_SYMMETRIC:
160477e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
16052188ac68SBarry Smith   case MAT_HERMITIAN:
16062188ac68SBarry Smith   case MAT_SYMMETRY_ETERNAL:
16074e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
160877e54ba9SKris Buschelman     break;
160912c028f9SKris Buschelman   default:
1610ce94432eSBarry Smith     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1611d64ed03dSBarry Smith   }
16123a40ed3dSBarry Smith   PetscFunctionReturn(0);
161358667388SSatish Balay }
161458667388SSatish Balay 
16154a2ae208SSatish Balay #undef __FUNCT__
16166a719282SBarry Smith #define __FUNCT__ "MatTranspose_MPIBAIJ"
1617fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
16180ac07820SSatish Balay {
16190ac07820SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
16200ac07820SSatish Balay   Mat_SeqBAIJ    *Aloc;
16210ac07820SSatish Balay   Mat            B;
1622dfbe8321SBarry Smith   PetscErrorCode ierr;
1623d0f46423SBarry Smith   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1624d0f46423SBarry Smith   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
16253eda8832SBarry Smith   MatScalar      *a;
16260ac07820SSatish Balay 
1627d64ed03dSBarry Smith   PetscFunctionBegin;
1628ce94432eSBarry Smith   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1629fc4dec0aSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1630ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1631d0f46423SBarry Smith     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
16327adad957SLisandro Dalcin     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
16332e72b8d9SBarry Smith     /* Do not know preallocation information, but must set block size */
16340298fd71SBarry Smith     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1635fc4dec0aSBarry Smith   } else {
1636fc4dec0aSBarry Smith     B = *matout;
1637fc4dec0aSBarry Smith   }
16380ac07820SSatish Balay 
16390ac07820SSatish Balay   /* copy over the A part */
16400ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
16410ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1642785e854fSJed Brown   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
16430ac07820SSatish Balay 
16440ac07820SSatish Balay   for (i=0; i<mbs; i++) {
1645899cda47SBarry Smith     rvals[0] = bs*(baij->rstartbs + i);
164626fbe8dcSKarl Rupp     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
16470ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1648899cda47SBarry Smith       col = (baij->cstartbs+aj[j])*bs;
16490ac07820SSatish Balay       for (k=0; k<bs; k++) {
165097e5c40aSBarry Smith         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
165126fbe8dcSKarl Rupp 
16520ac07820SSatish Balay         col++; a += bs;
16530ac07820SSatish Balay       }
16540ac07820SSatish Balay     }
16550ac07820SSatish Balay   }
16560ac07820SSatish Balay   /* copy over the B part */
16570ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
16580ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
16590ac07820SSatish Balay   for (i=0; i<mbs; i++) {
1660899cda47SBarry Smith     rvals[0] = bs*(baij->rstartbs + i);
166126fbe8dcSKarl Rupp     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
16620ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16630ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
16640ac07820SSatish Balay       for (k=0; k<bs; k++) {
166597e5c40aSBarry Smith         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
166626fbe8dcSKarl Rupp         col++;
166726fbe8dcSKarl Rupp         a += bs;
16680ac07820SSatish Balay       }
16690ac07820SSatish Balay     }
16700ac07820SSatish Balay   }
1671606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
16720ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16730ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16740ac07820SSatish Balay 
167526fbe8dcSKarl Rupp   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
167626fbe8dcSKarl Rupp   else {
1677eb6b5d47SBarry Smith     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
16780ac07820SSatish Balay   }
16793a40ed3dSBarry Smith   PetscFunctionReturn(0);
16800ac07820SSatish Balay }
16810e95ebc0SSatish Balay 
16824a2ae208SSatish Balay #undef __FUNCT__
16834a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1684dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
16850e95ebc0SSatish Balay {
168636c4a09eSSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
168736c4a09eSSatish Balay   Mat            a     = baij->A,b = baij->B;
1688dfbe8321SBarry Smith   PetscErrorCode ierr;
1689b24ad042SBarry Smith   PetscInt       s1,s2,s3;
16900e95ebc0SSatish Balay 
1691d64ed03dSBarry Smith   PetscFunctionBegin;
169236c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
169336c4a09eSSatish Balay   if (rr) {
169436c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1695e32f2f54SBarry Smith     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
169636c4a09eSSatish Balay     /* Overlap communication with computation. */
1697ca9f406cSSatish Balay     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
169836c4a09eSSatish Balay   }
16990e95ebc0SSatish Balay   if (ll) {
17000e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1701e32f2f54SBarry Smith     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
17020298fd71SBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
17030e95ebc0SSatish Balay   }
170436c4a09eSSatish Balay   /* scale  the diagonal block */
170536c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
170636c4a09eSSatish Balay 
170736c4a09eSSatish Balay   if (rr) {
170836c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
1709ca9f406cSSatish Balay     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17100298fd71SBarry Smith     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
171136c4a09eSSatish Balay   }
17123a40ed3dSBarry Smith   PetscFunctionReturn(0);
17130e95ebc0SSatish Balay }
17140e95ebc0SSatish Balay 
17154a2ae208SSatish Balay #undef __FUNCT__
17164a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ"
17172b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
17180ac07820SSatish Balay {
17190ac07820SSatish Balay   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
172065a92638SMatthew G. Knepley   PetscInt      *owners = A->rmap->range;
172165a92638SMatthew G. Knepley   PetscInt       n      = A->rmap->n;
172265a92638SMatthew G. Knepley   PetscSF        sf;
172365a92638SMatthew G. Knepley   PetscInt      *lrows;
172465a92638SMatthew G. Knepley   PetscSFNode   *rrows;
172569ea2d38SJed Brown   PetscInt       r, p = 0, len = 0;
17266849ba73SBarry Smith   PetscErrorCode ierr;
17270ac07820SSatish Balay 
1728d64ed03dSBarry Smith   PetscFunctionBegin;
172965a92638SMatthew G. Knepley   /* Create SF where leaves are input rows and roots are owned rows */
1730785e854fSJed Brown   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
173165a92638SMatthew G. Knepley   for (r = 0; r < n; ++r) lrows[r] = -1;
1732a34163a4SJed Brown   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
173365a92638SMatthew G. Knepley   for (r = 0; r < N; ++r) {
173465a92638SMatthew G. Knepley     const PetscInt idx   = rows[r];
173569ea2d38SJed Brown     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
173669ea2d38SJed Brown     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
173769ea2d38SJed Brown       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
173869ea2d38SJed Brown     }
1739a34163a4SJed Brown     if (A->nooffproczerorows) {
1740a34163a4SJed Brown       if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank);
1741a34163a4SJed Brown       lrows[len++] = idx - owners[p];
1742a34163a4SJed Brown     } else {
174365a92638SMatthew G. Knepley       rrows[r].rank = p;
174465a92638SMatthew G. Knepley       rrows[r].index = rows[r] - owners[p];
17450ac07820SSatish Balay     }
1746a34163a4SJed Brown   }
1747a34163a4SJed Brown   if (!A->nooffproczerorows) {
174865a92638SMatthew G. Knepley     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
174965a92638SMatthew G. Knepley     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
175065a92638SMatthew G. Knepley     /* Collect flags for rows to be zeroed */
175165a92638SMatthew G. Knepley     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
175265a92638SMatthew G. Knepley     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
175365a92638SMatthew G. Knepley     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
175465a92638SMatthew G. Knepley     /* Compress and put in row numbers */
175565a92638SMatthew G. Knepley     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1756a34163a4SJed Brown   }
175797b48c8fSBarry Smith   /* fix right hand side if needed */
175897b48c8fSBarry Smith   if (x && b) {
175965a92638SMatthew G. Knepley     const PetscScalar *xx;
176065a92638SMatthew G. Knepley     PetscScalar       *bb;
176165a92638SMatthew G. Knepley 
176297b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
176397b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
176465a92638SMatthew G. Knepley     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
176597b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
176697b48c8fSBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
176797b48c8fSBarry Smith   }
176897b48c8fSBarry Smith 
17690ac07820SSatish Balay   /* actually zap the local rows */
177072dacd9aSBarry Smith   /*
177172dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
1772a8c7a070SBarry Smith      is square and the user wishes to set the diagonal we use separate
177372dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
177472dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
177572dacd9aSBarry Smith 
177672dacd9aSBarry Smith   */
17779c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1778a34163a4SJed Brown   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1779d0f46423SBarry Smith   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1780a34163a4SJed Brown     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1781f4df32b1SMatthew Knepley   } else if (diag != 0.0) {
178265a92638SMatthew G. Knepley     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1783e7e72b3dSBarry Smith     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1784512a5fc5SBarry Smith        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
178565a92638SMatthew G. Knepley     for (r = 0; r < len; ++r) {
178665a92638SMatthew G. Knepley       const PetscInt row = lrows[r] + A->rmap->rstart;
1787f4df32b1SMatthew Knepley       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1788a07cd24cSSatish Balay     }
1789a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1790a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17919c957beeSSatish Balay   } else {
1792a34163a4SJed Brown     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1793a07cd24cSSatish Balay   }
1794606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
17954f9cfa9eSBarry Smith 
17964f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
17974f9cfa9eSBarry Smith   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1798e56f5c9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
179909e82e2bSBarry Smith     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1800e56f5c9eSBarry Smith   }
18013a40ed3dSBarry Smith   PetscFunctionReturn(0);
18020ac07820SSatish Balay }
180372dacd9aSBarry Smith 
18044a2ae208SSatish Balay #undef __FUNCT__
18056f0a72daSMatthew G. Knepley #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
18066f0a72daSMatthew G. Knepley PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
18076f0a72daSMatthew G. Knepley {
18086f0a72daSMatthew G. Knepley   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
18096f0a72daSMatthew G. Knepley   PetscErrorCode    ierr;
18105ba17502SJed Brown   PetscMPIInt       n = A->rmap->n;
1811fbb64d0eSMatthew G. Knepley   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
18126f0a72daSMatthew G. Knepley   PetscInt          *lrows,*owners = A->rmap->range;
18136f0a72daSMatthew G. Knepley   PetscSFNode       *rrows;
18146f0a72daSMatthew G. Knepley   PetscSF           sf;
18156f0a72daSMatthew G. Knepley   const PetscScalar *xx;
18166f0a72daSMatthew G. Knepley   PetscScalar       *bb,*mask;
18176f0a72daSMatthew G. Knepley   Vec               xmask,lmask;
18186f0a72daSMatthew G. Knepley   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
18196f0a72daSMatthew G. Knepley   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
18206f0a72daSMatthew G. Knepley   PetscScalar       *aa;
18216f0a72daSMatthew G. Knepley 
18226f0a72daSMatthew G. Knepley   PetscFunctionBegin;
18236f0a72daSMatthew G. Knepley   /* Create SF where leaves are input rows and roots are owned rows */
18246f0a72daSMatthew G. Knepley   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
18256f0a72daSMatthew G. Knepley   for (r = 0; r < n; ++r) lrows[r] = -1;
18266f0a72daSMatthew G. Knepley   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
18276f0a72daSMatthew G. Knepley   for (r = 0; r < N; ++r) {
18286f0a72daSMatthew G. Knepley     const PetscInt idx   = rows[r];
18295ba17502SJed Brown     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
18305ba17502SJed Brown     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
18315ba17502SJed Brown       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
18325ba17502SJed Brown     }
18336f0a72daSMatthew G. Knepley     rrows[r].rank  = p;
18346f0a72daSMatthew G. Knepley     rrows[r].index = rows[r] - owners[p];
18356f0a72daSMatthew G. Knepley   }
18366f0a72daSMatthew G. Knepley   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
18376f0a72daSMatthew G. Knepley   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
18386f0a72daSMatthew G. Knepley   /* Collect flags for rows to be zeroed */
18396f0a72daSMatthew G. Knepley   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
18406f0a72daSMatthew G. Knepley   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
18416f0a72daSMatthew G. Knepley   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
18426f0a72daSMatthew G. Knepley   /* Compress and put in row numbers */
18436f0a72daSMatthew G. Knepley   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
18446f0a72daSMatthew G. Knepley   /* zero diagonal part of matrix */
18456f0a72daSMatthew G. Knepley   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
18466f0a72daSMatthew G. Knepley   /* handle off diagonal part of matrix */
18476f0a72daSMatthew G. Knepley   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
18486f0a72daSMatthew G. Knepley   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
18496f0a72daSMatthew G. Knepley   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
18506f0a72daSMatthew G. Knepley   for (i=0; i<len; i++) bb[lrows[i]] = 1;
18516f0a72daSMatthew G. Knepley   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
18526f0a72daSMatthew G. Knepley   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
18536f0a72daSMatthew G. Knepley   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
18546f0a72daSMatthew G. Knepley   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
18556f0a72daSMatthew G. Knepley   if (x) {
18566f0a72daSMatthew G. Knepley     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
18576f0a72daSMatthew G. Knepley     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
18586f0a72daSMatthew G. Knepley     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
18596f0a72daSMatthew G. Knepley     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
18606f0a72daSMatthew G. Knepley   }
18616f0a72daSMatthew G. Knepley   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
18626f0a72daSMatthew G. Knepley   /* remove zeroed rows of off diagonal matrix */
18636f0a72daSMatthew G. Knepley   for (i = 0; i < len; ++i) {
18646f0a72daSMatthew G. Knepley     row   = lrows[i];
18656f0a72daSMatthew G. Knepley     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
18666f0a72daSMatthew G. Knepley     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
18676f0a72daSMatthew G. Knepley     for (k = 0; k < count; ++k) {
18686f0a72daSMatthew G. Knepley       aa[0] = 0.0;
18696f0a72daSMatthew G. Knepley       aa   += bs;
18706f0a72daSMatthew G. Knepley     }
18716f0a72daSMatthew G. Knepley   }
18726f0a72daSMatthew G. Knepley   /* loop over all elements of off process part of matrix zeroing removed columns*/
18736f0a72daSMatthew G. Knepley   for (i = 0; i < l->B->rmap->N; ++i) {
18746f0a72daSMatthew G. Knepley     row = i/bs;
18756f0a72daSMatthew G. Knepley     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
18766f0a72daSMatthew G. Knepley       for (k = 0; k < bs; ++k) {
18776f0a72daSMatthew G. Knepley         col = bs*baij->j[j] + k;
18786f0a72daSMatthew G. Knepley         if (PetscAbsScalar(mask[col])) {
18796f0a72daSMatthew G. Knepley           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
18806f0a72daSMatthew G. Knepley           if (b) bb[i] -= aa[0]*xx[col];
18816f0a72daSMatthew G. Knepley           aa[0] = 0.0;
18826f0a72daSMatthew G. Knepley         }
18836f0a72daSMatthew G. Knepley       }
18846f0a72daSMatthew G. Knepley     }
18856f0a72daSMatthew G. Knepley   }
18866f0a72daSMatthew G. Knepley   if (x) {
18876f0a72daSMatthew G. Knepley     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
18886f0a72daSMatthew G. Knepley     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
18896f0a72daSMatthew G. Knepley   }
18906f0a72daSMatthew G. Knepley   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
18916f0a72daSMatthew G. Knepley   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
18926f0a72daSMatthew G. Knepley   ierr = PetscFree(lrows);CHKERRQ(ierr);
18934f9cfa9eSBarry Smith 
18944f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
18954f9cfa9eSBarry Smith   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
18964f9cfa9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
18974f9cfa9eSBarry Smith     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
18984f9cfa9eSBarry Smith   }
18996f0a72daSMatthew G. Knepley   PetscFunctionReturn(0);
19006f0a72daSMatthew G. Knepley }
19016f0a72daSMatthew G. Knepley 
19026f0a72daSMatthew G. Knepley #undef __FUNCT__
19034a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1904dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1905bb5a7306SBarry Smith {
1906bb5a7306SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1907dfbe8321SBarry Smith   PetscErrorCode ierr;
1908d64ed03dSBarry Smith 
1909d64ed03dSBarry Smith   PetscFunctionBegin;
1910bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
19113a40ed3dSBarry Smith   PetscFunctionReturn(0);
1912bb5a7306SBarry Smith }
1913bb5a7306SBarry Smith 
19146849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
19150ac07820SSatish Balay 
19164a2ae208SSatish Balay #undef __FUNCT__
19174a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ"
1918ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
19197fc3c18eSBarry Smith {
19207fc3c18eSBarry Smith   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
19217fc3c18eSBarry Smith   Mat            a,b,c,d;
1922ace3abfcSBarry Smith   PetscBool      flg;
1923dfbe8321SBarry Smith   PetscErrorCode ierr;
19247fc3c18eSBarry Smith 
19257fc3c18eSBarry Smith   PetscFunctionBegin;
19267fc3c18eSBarry Smith   a = matA->A; b = matA->B;
19277fc3c18eSBarry Smith   c = matB->A; d = matB->B;
19287fc3c18eSBarry Smith 
19297fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1930abc0a331SBarry Smith   if (flg) {
19317fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
19327fc3c18eSBarry Smith   }
1933ce94432eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
19347fc3c18eSBarry Smith   PetscFunctionReturn(0);
19357fc3c18eSBarry Smith }
19367fc3c18eSBarry Smith 
19373c896bc6SHong Zhang #undef __FUNCT__
19383c896bc6SHong Zhang #define __FUNCT__ "MatCopy_MPIBAIJ"
19393c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
19403c896bc6SHong Zhang {
19413c896bc6SHong Zhang   PetscErrorCode ierr;
19423c896bc6SHong Zhang   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19433c896bc6SHong Zhang   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
19443c896bc6SHong Zhang 
19453c896bc6SHong Zhang   PetscFunctionBegin;
19463c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
19473c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
19483c896bc6SHong Zhang     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
19493c896bc6SHong Zhang   } else {
19503c896bc6SHong Zhang     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
19513c896bc6SHong Zhang     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
19523c896bc6SHong Zhang   }
19533c896bc6SHong Zhang   PetscFunctionReturn(0);
19543c896bc6SHong Zhang }
1955273d9f13SBarry Smith 
19564a2ae208SSatish Balay #undef __FUNCT__
19574994cf47SJed Brown #define __FUNCT__ "MatSetUp_MPIBAIJ"
19584994cf47SJed Brown PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1959273d9f13SBarry Smith {
1960dfbe8321SBarry Smith   PetscErrorCode ierr;
1961273d9f13SBarry Smith 
1962273d9f13SBarry Smith   PetscFunctionBegin;
1963535b19f3SBarry Smith   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1964273d9f13SBarry Smith   PetscFunctionReturn(0);
1965273d9f13SBarry Smith }
1966273d9f13SBarry Smith 
19674fe895cdSHong Zhang #undef __FUNCT__
19684de5dceeSHong Zhang #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ"
19694de5dceeSHong Zhang PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
19704de5dceeSHong Zhang {
1971001ddc4fSHong Zhang   PetscErrorCode ierr;
1972001ddc4fSHong Zhang   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
19734de5dceeSHong Zhang   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
19744de5dceeSHong Zhang   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
19754de5dceeSHong Zhang 
19764de5dceeSHong Zhang   PetscFunctionBegin;
1977001ddc4fSHong Zhang   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
19784de5dceeSHong Zhang   PetscFunctionReturn(0);
19794de5dceeSHong Zhang }
19804de5dceeSHong Zhang 
19814de5dceeSHong Zhang #undef __FUNCT__
19824fe895cdSHong Zhang #define __FUNCT__ "MatAXPY_MPIBAIJ"
19834fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
19844fe895cdSHong Zhang {
19854fe895cdSHong Zhang   PetscErrorCode ierr;
19864fe895cdSHong Zhang   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
19874fe895cdSHong Zhang   PetscBLASInt   bnz,one=1;
19884fe895cdSHong Zhang   Mat_SeqBAIJ    *x,*y;
19894fe895cdSHong Zhang 
19904fe895cdSHong Zhang   PetscFunctionBegin;
19914fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
19924fe895cdSHong Zhang     PetscScalar alpha = a;
19934fe895cdSHong Zhang     x    = (Mat_SeqBAIJ*)xx->A->data;
19944fe895cdSHong Zhang     y    = (Mat_SeqBAIJ*)yy->A->data;
1995c5df96a5SBarry Smith     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
19968b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
19974fe895cdSHong Zhang     x    = (Mat_SeqBAIJ*)xx->B->data;
19984fe895cdSHong Zhang     y    = (Mat_SeqBAIJ*)yy->B->data;
1999c5df96a5SBarry Smith     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
20008b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2001a3fa217bSJose E. Roman     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
20024fe895cdSHong Zhang   } else {
20034de5dceeSHong Zhang     Mat      B;
20044de5dceeSHong Zhang     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
20054de5dceeSHong Zhang     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
20064de5dceeSHong Zhang     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
20074de5dceeSHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
20084de5dceeSHong Zhang     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
20094de5dceeSHong Zhang     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
20104de5dceeSHong Zhang     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
20114de5dceeSHong Zhang     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
20124de5dceeSHong Zhang     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
20134de5dceeSHong Zhang     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
20144de5dceeSHong Zhang     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
20154de5dceeSHong Zhang     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
20164de5dceeSHong Zhang     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
20174de5dceeSHong Zhang     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
20184de5dceeSHong Zhang     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
20194de5dceeSHong Zhang     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
20204fe895cdSHong Zhang   }
20214fe895cdSHong Zhang   PetscFunctionReturn(0);
20224fe895cdSHong Zhang }
20234fe895cdSHong Zhang 
202499cafbc1SBarry Smith #undef __FUNCT__
202599cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_MPIBAIJ"
202699cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
202799cafbc1SBarry Smith {
202899cafbc1SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
202999cafbc1SBarry Smith   PetscErrorCode ierr;
203099cafbc1SBarry Smith 
203199cafbc1SBarry Smith   PetscFunctionBegin;
203299cafbc1SBarry Smith   ierr = MatRealPart(a->A);CHKERRQ(ierr);
203399cafbc1SBarry Smith   ierr = MatRealPart(a->B);CHKERRQ(ierr);
203499cafbc1SBarry Smith   PetscFunctionReturn(0);
203599cafbc1SBarry Smith }
203699cafbc1SBarry Smith 
203799cafbc1SBarry Smith #undef __FUNCT__
203899cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
203999cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
204099cafbc1SBarry Smith {
204199cafbc1SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
204299cafbc1SBarry Smith   PetscErrorCode ierr;
204399cafbc1SBarry Smith 
204499cafbc1SBarry Smith   PetscFunctionBegin;
204599cafbc1SBarry Smith   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
204699cafbc1SBarry Smith   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
204799cafbc1SBarry Smith   PetscFunctionReturn(0);
204899cafbc1SBarry Smith }
204999cafbc1SBarry Smith 
205082094794SBarry Smith #undef __FUNCT__
205182094794SBarry Smith #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
20524aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
20534aa3045dSJed Brown {
20544aa3045dSJed Brown   PetscErrorCode ierr;
20554aa3045dSJed Brown   IS             iscol_local;
20564aa3045dSJed Brown   PetscInt       csize;
20574aa3045dSJed Brown 
20584aa3045dSJed Brown   PetscFunctionBegin;
20594aa3045dSJed Brown   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2060b79d0421SJed Brown   if (call == MAT_REUSE_MATRIX) {
2061b79d0421SJed Brown     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2062e32f2f54SBarry Smith     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2063b79d0421SJed Brown   } else {
20644aa3045dSJed Brown     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2065b79d0421SJed Brown   }
20664aa3045dSJed Brown   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2067b79d0421SJed Brown   if (call == MAT_INITIAL_MATRIX) {
2068b79d0421SJed Brown     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
20696bf464f9SBarry Smith     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2070b79d0421SJed Brown   }
20714aa3045dSJed Brown   PetscFunctionReturn(0);
20724aa3045dSJed Brown }
207329dcf524SDmitry Karpeev extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
20744aa3045dSJed Brown #undef __FUNCT__
2075dd183c9eSJed Brown #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
207682094794SBarry Smith /*
207782094794SBarry Smith   Not great since it makes two copies of the submatrix, first an SeqBAIJ
207882094794SBarry Smith   in local and then by concatenating the local matrices the end result.
207982094794SBarry Smith   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
208082094794SBarry Smith */
20814aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
208282094794SBarry Smith {
208382094794SBarry Smith   PetscErrorCode ierr;
208482094794SBarry Smith   PetscMPIInt    rank,size;
208582094794SBarry Smith   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
208629dcf524SDmitry Karpeev   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
208729dcf524SDmitry Karpeev   Mat            M,Mreuse;
208882094794SBarry Smith   MatScalar      *vwork,*aa;
2089ce94432eSBarry Smith   MPI_Comm       comm;
209029dcf524SDmitry Karpeev   IS             isrow_new, iscol_new;
209129dcf524SDmitry Karpeev   PetscBool      idflag,allrows, allcols;
209282094794SBarry Smith   Mat_SeqBAIJ    *aij;
209382094794SBarry Smith 
209482094794SBarry Smith   PetscFunctionBegin;
2095ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
209682094794SBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
209782094794SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
209829dcf524SDmitry Karpeev   /* The compression and expansion should be avoided. Doesn't point
209929dcf524SDmitry Karpeev      out errors, might change the indices, hence buggey */
210029dcf524SDmitry Karpeev   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
210129dcf524SDmitry Karpeev   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
210282094794SBarry Smith 
210329dcf524SDmitry Karpeev   /* Check for special case: each processor gets entire matrix columns */
210429dcf524SDmitry Karpeev   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
210529dcf524SDmitry Karpeev   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
210626fbe8dcSKarl Rupp   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
210726fbe8dcSKarl Rupp   else allcols = PETSC_FALSE;
210829dcf524SDmitry Karpeev 
210929dcf524SDmitry Karpeev   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
211029dcf524SDmitry Karpeev   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
211126fbe8dcSKarl Rupp   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
211226fbe8dcSKarl Rupp   else allrows = PETSC_FALSE;
211326fbe8dcSKarl Rupp 
211482094794SBarry Smith   if (call ==  MAT_REUSE_MATRIX) {
211582094794SBarry Smith     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2116e32f2f54SBarry Smith     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
211775f6568bSJed Brown     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
211882094794SBarry Smith   } else {
211975f6568bSJed Brown     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
212082094794SBarry Smith   }
212129dcf524SDmitry Karpeev   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
212229dcf524SDmitry Karpeev   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
212382094794SBarry Smith   /*
212482094794SBarry Smith       m - number of local rows
212582094794SBarry Smith       n - number of columns (same on all processors)
212682094794SBarry Smith       rstart - first row in new global matrix generated
212782094794SBarry Smith   */
212882094794SBarry Smith   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
212982094794SBarry Smith   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
213082094794SBarry Smith   m    = m/bs;
213182094794SBarry Smith   n    = n/bs;
213282094794SBarry Smith 
213382094794SBarry Smith   if (call == MAT_INITIAL_MATRIX) {
213482094794SBarry Smith     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
213582094794SBarry Smith     ii  = aij->i;
213682094794SBarry Smith     jj  = aij->j;
213782094794SBarry Smith 
213882094794SBarry Smith     /*
213982094794SBarry Smith         Determine the number of non-zeros in the diagonal and off-diagonal
214082094794SBarry Smith         portions of the matrix in order to do correct preallocation
214182094794SBarry Smith     */
214282094794SBarry Smith 
214382094794SBarry Smith     /* first get start and end of "diagonal" columns */
214482094794SBarry Smith     if (csize == PETSC_DECIDE) {
214582094794SBarry Smith       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
214682094794SBarry Smith       if (mglobal == n*bs) { /* square matrix */
214782094794SBarry Smith         nlocal = m;
214882094794SBarry Smith       } else {
214982094794SBarry Smith         nlocal = n/size + ((n % size) > rank);
215082094794SBarry Smith       }
215182094794SBarry Smith     } else {
215282094794SBarry Smith       nlocal = csize/bs;
215382094794SBarry Smith     }
215482094794SBarry Smith     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
215582094794SBarry Smith     rstart = rend - nlocal;
215665e19b50SBarry Smith     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
215782094794SBarry Smith 
215882094794SBarry Smith     /* next, compute all the lengths */
2159dcca6d9dSJed Brown     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
216082094794SBarry Smith     for (i=0; i<m; i++) {
216182094794SBarry Smith       jend = ii[i+1] - ii[i];
216282094794SBarry Smith       olen = 0;
216382094794SBarry Smith       dlen = 0;
216482094794SBarry Smith       for (j=0; j<jend; j++) {
216582094794SBarry Smith         if (*jj < rstart || *jj >= rend) olen++;
216682094794SBarry Smith         else dlen++;
216782094794SBarry Smith         jj++;
216882094794SBarry Smith       }
216982094794SBarry Smith       olens[i] = olen;
217082094794SBarry Smith       dlens[i] = dlen;
217182094794SBarry Smith     }
217282094794SBarry Smith     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
217382094794SBarry Smith     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
217482094794SBarry Smith     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
217582094794SBarry Smith     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2176eb9baa12SBarry Smith     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
217782094794SBarry Smith   } else {
217882094794SBarry Smith     PetscInt ml,nl;
217982094794SBarry Smith 
218082094794SBarry Smith     M    = *newmat;
218182094794SBarry Smith     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2182e32f2f54SBarry Smith     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
218382094794SBarry Smith     ierr = MatZeroEntries(M);CHKERRQ(ierr);
218482094794SBarry Smith     /*
218582094794SBarry Smith          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
218682094794SBarry Smith        rather than the slower MatSetValues().
218782094794SBarry Smith     */
218882094794SBarry Smith     M->was_assembled = PETSC_TRUE;
218982094794SBarry Smith     M->assembled     = PETSC_FALSE;
219082094794SBarry Smith   }
219182094794SBarry Smith   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
219282094794SBarry Smith   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
219382094794SBarry Smith   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
219482094794SBarry Smith   ii   = aij->i;
219582094794SBarry Smith   jj   = aij->j;
219682094794SBarry Smith   aa   = aij->a;
219782094794SBarry Smith   for (i=0; i<m; i++) {
219882094794SBarry Smith     row   = rstart/bs + i;
219982094794SBarry Smith     nz    = ii[i+1] - ii[i];
220082094794SBarry Smith     cwork = jj;     jj += nz;
220175f6568bSJed Brown     vwork = aa;     aa += nz*bs*bs;
220282094794SBarry Smith     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
220382094794SBarry Smith   }
220482094794SBarry Smith 
220582094794SBarry Smith   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
220682094794SBarry Smith   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
220782094794SBarry Smith   *newmat = M;
220882094794SBarry Smith 
220982094794SBarry Smith   /* save submatrix used in processor for next request */
221082094794SBarry Smith   if (call ==  MAT_INITIAL_MATRIX) {
221182094794SBarry Smith     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
221282094794SBarry Smith     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
221382094794SBarry Smith   }
221482094794SBarry Smith   PetscFunctionReturn(0);
221582094794SBarry Smith }
221682094794SBarry Smith 
221782094794SBarry Smith #undef __FUNCT__
221882094794SBarry Smith #define __FUNCT__ "MatPermute_MPIBAIJ"
221982094794SBarry Smith PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
222082094794SBarry Smith {
222182094794SBarry Smith   MPI_Comm       comm,pcomm;
2222a0a83eb5SRémi Lacroix   PetscInt       clocal_size,nrows;
222382094794SBarry Smith   const PetscInt *rows;
2224dbf0e21dSBarry Smith   PetscMPIInt    size;
2225a0a83eb5SRémi Lacroix   IS             crowp,lcolp;
222682094794SBarry Smith   PetscErrorCode ierr;
222782094794SBarry Smith 
222882094794SBarry Smith   PetscFunctionBegin;
222982094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
223082094794SBarry Smith   /* make a collective version of 'rowp' */
223182094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
223282094794SBarry Smith   if (pcomm==comm) {
223382094794SBarry Smith     crowp = rowp;
223482094794SBarry Smith   } else {
223582094794SBarry Smith     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
223682094794SBarry Smith     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
223770b3c8c7SBarry Smith     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
223882094794SBarry Smith     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
223982094794SBarry Smith   }
2240a0a83eb5SRémi Lacroix   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2241a0a83eb5SRémi Lacroix   /* make a local version of 'colp' */
224282094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2243dbf0e21dSBarry Smith   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2244dbf0e21dSBarry Smith   if (size==1) {
224582094794SBarry Smith     lcolp = colp;
224682094794SBarry Smith   } else {
224775f6568bSJed Brown     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
224882094794SBarry Smith   }
2249dbf0e21dSBarry Smith   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
225075f6568bSJed Brown   /* now we just get the submatrix */
22517afc1a8bSJed Brown   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2252a0a83eb5SRémi Lacroix   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2253a0a83eb5SRémi Lacroix   /* clean up */
2254a0a83eb5SRémi Lacroix   if (pcomm!=comm) {
2255a0a83eb5SRémi Lacroix     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2256a0a83eb5SRémi Lacroix   }
2257dbf0e21dSBarry Smith   if (size>1) {
22586bf464f9SBarry Smith     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
225982094794SBarry Smith   }
226082094794SBarry Smith   PetscFunctionReturn(0);
226182094794SBarry Smith }
226282094794SBarry Smith 
22638c7482ecSBarry Smith #undef __FUNCT__
22648c7482ecSBarry Smith #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
22657087cfbeSBarry Smith PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
22668c7482ecSBarry Smith {
22678c7482ecSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
22688c7482ecSBarry Smith   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
22698c7482ecSBarry Smith 
22708c7482ecSBarry Smith   PetscFunctionBegin;
227126fbe8dcSKarl Rupp   if (nghosts) *nghosts = B->nbs;
227226fbe8dcSKarl Rupp   if (ghosts) *ghosts = baij->garray;
22738c7482ecSBarry Smith   PetscFunctionReturn(0);
22748c7482ecSBarry Smith }
22758c7482ecSBarry Smith 
2276f6d58c54SBarry Smith #undef __FUNCT__
2277d1adec66SJed Brown #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2278d1adec66SJed Brown PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2279f6d58c54SBarry Smith {
2280f6d58c54SBarry Smith   Mat            B;
2281f6d58c54SBarry Smith   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2282f6d58c54SBarry Smith   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2283f6d58c54SBarry Smith   Mat_SeqAIJ     *b;
2284f6d58c54SBarry Smith   PetscErrorCode ierr;
2285f6d58c54SBarry Smith   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2286f6d58c54SBarry Smith   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2287f6d58c54SBarry Smith   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2288f6d58c54SBarry Smith 
2289f6d58c54SBarry Smith   PetscFunctionBegin;
2290ce94432eSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2291ce94432eSBarry Smith   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2292f6d58c54SBarry Smith 
2293f6d58c54SBarry Smith   /* ----------------------------------------------------------------
2294f6d58c54SBarry Smith      Tell every processor the number of nonzeros per row
2295f6d58c54SBarry Smith   */
2296785e854fSJed Brown   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2297f6d58c54SBarry Smith   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2298f6d58c54SBarry Smith     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2299f6d58c54SBarry Smith   }
2300f6d58c54SBarry Smith   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2301785e854fSJed Brown   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2302f6d58c54SBarry Smith   displs    = recvcounts + size;
2303f6d58c54SBarry Smith   for (i=0; i<size; i++) {
2304f6d58c54SBarry Smith     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2305f6d58c54SBarry Smith     displs[i]     = A->rmap->range[i]/bs;
2306f6d58c54SBarry Smith   }
2307f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE)
2308ce94432eSBarry Smith   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2309f6d58c54SBarry Smith #else
2310ce94432eSBarry Smith   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2311f6d58c54SBarry Smith #endif
2312f6d58c54SBarry Smith   /* ---------------------------------------------------------------
2313f6d58c54SBarry Smith      Create the sequential matrix of the same type as the local block diagonal
2314f6d58c54SBarry Smith   */
2315f6d58c54SBarry Smith   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2316f6d58c54SBarry Smith   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2317f6d58c54SBarry Smith   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2318f6d58c54SBarry Smith   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2319f6d58c54SBarry Smith   b    = (Mat_SeqAIJ*)B->data;
2320f6d58c54SBarry Smith 
2321f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2322f6d58c54SBarry Smith     Copy my part of matrix column indices over
2323f6d58c54SBarry Smith   */
2324f6d58c54SBarry Smith   sendcount  = ad->nz + bd->nz;
2325f6d58c54SBarry Smith   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2326f6d58c54SBarry Smith   a_jsendbuf = ad->j;
2327f6d58c54SBarry Smith   b_jsendbuf = bd->j;
2328f6d58c54SBarry Smith   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2329f6d58c54SBarry Smith   cnt        = 0;
2330f6d58c54SBarry Smith   for (i=0; i<n; i++) {
2331f6d58c54SBarry Smith 
2332f6d58c54SBarry Smith     /* put in lower diagonal portion */
2333f6d58c54SBarry Smith     m = bd->i[i+1] - bd->i[i];
2334f6d58c54SBarry Smith     while (m > 0) {
2335f6d58c54SBarry Smith       /* is it above diagonal (in bd (compressed) numbering) */
2336f6d58c54SBarry Smith       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2337f6d58c54SBarry Smith       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2338f6d58c54SBarry Smith       m--;
2339f6d58c54SBarry Smith     }
2340f6d58c54SBarry Smith 
2341f6d58c54SBarry Smith     /* put in diagonal portion */
2342f6d58c54SBarry Smith     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2343f6d58c54SBarry Smith       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2344f6d58c54SBarry Smith     }
2345f6d58c54SBarry Smith 
2346f6d58c54SBarry Smith     /* put in upper diagonal portion */
2347f6d58c54SBarry Smith     while (m-- > 0) {
2348f6d58c54SBarry Smith       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2349f6d58c54SBarry Smith     }
2350f6d58c54SBarry Smith   }
2351e32f2f54SBarry Smith   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2352f6d58c54SBarry Smith 
2353f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2354f6d58c54SBarry Smith     Gather all column indices to all processors
2355f6d58c54SBarry Smith   */
2356f6d58c54SBarry Smith   for (i=0; i<size; i++) {
2357f6d58c54SBarry Smith     recvcounts[i] = 0;
2358f6d58c54SBarry Smith     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2359f6d58c54SBarry Smith       recvcounts[i] += lens[j];
2360f6d58c54SBarry Smith     }
2361f6d58c54SBarry Smith   }
2362f6d58c54SBarry Smith   displs[0] = 0;
2363f6d58c54SBarry Smith   for (i=1; i<size; i++) {
2364f6d58c54SBarry Smith     displs[i] = displs[i-1] + recvcounts[i-1];
2365f6d58c54SBarry Smith   }
2366f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE)
2367ce94432eSBarry Smith   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2368f6d58c54SBarry Smith #else
2369ce94432eSBarry Smith   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2370f6d58c54SBarry Smith #endif
2371f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2372f6d58c54SBarry Smith     Assemble the matrix into useable form (note numerical values not yet set)
2373f6d58c54SBarry Smith   */
2374f6d58c54SBarry Smith   /* set the b->ilen (length of each row) values */
2375f6d58c54SBarry Smith   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2376f6d58c54SBarry Smith   /* set the b->i indices */
2377f6d58c54SBarry Smith   b->i[0] = 0;
2378f6d58c54SBarry Smith   for (i=1; i<=A->rmap->N/bs; i++) {
2379f6d58c54SBarry Smith     b->i[i] = b->i[i-1] + lens[i-1];
2380f6d58c54SBarry Smith   }
2381f6d58c54SBarry Smith   ierr = PetscFree(lens);CHKERRQ(ierr);
2382f6d58c54SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2383f6d58c54SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2384f6d58c54SBarry Smith   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2385f6d58c54SBarry Smith 
2386f6d58c54SBarry Smith   if (A->symmetric) {
2387f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2388f6d58c54SBarry Smith   } else if (A->hermitian) {
2389f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2390f6d58c54SBarry Smith   } else if (A->structurally_symmetric) {
2391f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2392f6d58c54SBarry Smith   }
2393f6d58c54SBarry Smith   *newmat = B;
2394f6d58c54SBarry Smith   PetscFunctionReturn(0);
2395f6d58c54SBarry Smith }
2396f6d58c54SBarry Smith 
2397b1a666ecSBarry Smith #undef __FUNCT__
2398b1a666ecSBarry Smith #define __FUNCT__ "MatSOR_MPIBAIJ"
2399b1a666ecSBarry Smith PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2400b1a666ecSBarry Smith {
2401b1a666ecSBarry Smith   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2402b1a666ecSBarry Smith   PetscErrorCode ierr;
2403b1a666ecSBarry Smith   Vec            bb1 = 0;
2404b1a666ecSBarry Smith 
2405b1a666ecSBarry Smith   PetscFunctionBegin;
2406b1a666ecSBarry Smith   if (flag == SOR_APPLY_UPPER) {
2407b1a666ecSBarry Smith     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2408b1a666ecSBarry Smith     PetscFunctionReturn(0);
2409b1a666ecSBarry Smith   }
2410b1a666ecSBarry Smith 
24114e980039SJed Brown   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
24124e980039SJed Brown     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
24134e980039SJed Brown   }
24144e980039SJed Brown 
2415b1a666ecSBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2416b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2417b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2418b1a666ecSBarry Smith       its--;
2419b1a666ecSBarry Smith     }
2420b1a666ecSBarry Smith 
2421b1a666ecSBarry Smith     while (its--) {
2422b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2423b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2424b1a666ecSBarry Smith 
2425b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2426b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2427b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2428b1a666ecSBarry Smith 
2429b1a666ecSBarry Smith       /* local sweep */
2430b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2431b1a666ecSBarry Smith     }
2432b1a666ecSBarry Smith   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2433b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2434b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2435b1a666ecSBarry Smith       its--;
2436b1a666ecSBarry Smith     }
2437b1a666ecSBarry Smith     while (its--) {
2438b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2439b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2440b1a666ecSBarry Smith 
2441b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2442b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2443b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2444b1a666ecSBarry Smith 
2445b1a666ecSBarry Smith       /* local sweep */
2446b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2447b1a666ecSBarry Smith     }
2448b1a666ecSBarry Smith   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2449b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2450b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2451b1a666ecSBarry Smith       its--;
2452b1a666ecSBarry Smith     }
2453b1a666ecSBarry Smith     while (its--) {
2454b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2455b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2456b1a666ecSBarry Smith 
2457b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2458b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2459b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2460b1a666ecSBarry Smith 
2461b1a666ecSBarry Smith       /* local sweep */
2462b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2463b1a666ecSBarry Smith     }
2464ce94432eSBarry Smith   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2465b1a666ecSBarry Smith 
24666bf464f9SBarry Smith   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2467b1a666ecSBarry Smith   PetscFunctionReturn(0);
2468b1a666ecSBarry Smith }
2469b1a666ecSBarry Smith 
2470bbead8a2SBarry Smith #undef __FUNCT__
247147f7623dSRémi Lacroix #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
247247f7623dSRémi Lacroix PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
247347f7623dSRémi Lacroix {
247447f7623dSRémi Lacroix   PetscErrorCode ierr;
247547f7623dSRémi Lacroix   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
247647f7623dSRémi Lacroix   PetscInt       N,i,*garray = aij->garray;
247747f7623dSRémi Lacroix   PetscInt       ib,jb,bs = A->rmap->bs;
247847f7623dSRémi Lacroix   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
247947f7623dSRémi Lacroix   MatScalar      *a_val = a_aij->a;
248047f7623dSRémi Lacroix   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
248147f7623dSRémi Lacroix   MatScalar      *b_val = b_aij->a;
248247f7623dSRémi Lacroix   PetscReal      *work;
248347f7623dSRémi Lacroix 
248447f7623dSRémi Lacroix   PetscFunctionBegin;
248547f7623dSRémi Lacroix   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
24861795a4d1SJed Brown   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
248747f7623dSRémi Lacroix   if (type == NORM_2) {
248847f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
248947f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
249047f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
249147f7623dSRémi Lacroix           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
249247f7623dSRémi Lacroix           a_val++;
249347f7623dSRémi Lacroix         }
249447f7623dSRémi Lacroix       }
249547f7623dSRémi Lacroix     }
249647f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
249747f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
249847f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
249947f7623dSRémi Lacroix           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
250047f7623dSRémi Lacroix           b_val++;
250147f7623dSRémi Lacroix         }
250247f7623dSRémi Lacroix       }
250347f7623dSRémi Lacroix     }
250447f7623dSRémi Lacroix   } else if (type == NORM_1) {
250547f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
250647f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
250747f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
250847f7623dSRémi Lacroix           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
250947f7623dSRémi Lacroix           a_val++;
251047f7623dSRémi Lacroix         }
251147f7623dSRémi Lacroix       }
251247f7623dSRémi Lacroix     }
251347f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
251447f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
251547f7623dSRémi Lacroix        for (ib=0; ib<bs; ib++) {
251647f7623dSRémi Lacroix           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
251747f7623dSRémi Lacroix           b_val++;
251847f7623dSRémi Lacroix         }
251947f7623dSRémi Lacroix       }
252047f7623dSRémi Lacroix     }
252147f7623dSRémi Lacroix   } else if (type == NORM_INFINITY) {
252247f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
252347f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
252447f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
252547f7623dSRémi Lacroix           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
252647f7623dSRémi Lacroix           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
252747f7623dSRémi Lacroix           a_val++;
252847f7623dSRémi Lacroix         }
252947f7623dSRémi Lacroix       }
253047f7623dSRémi Lacroix     }
253147f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
253247f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
253347f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
253447f7623dSRémi Lacroix           int col = garray[b_aij->j[i]] * bs + jb;
253547f7623dSRémi Lacroix           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
253647f7623dSRémi Lacroix           b_val++;
253747f7623dSRémi Lacroix         }
253847f7623dSRémi Lacroix       }
253947f7623dSRémi Lacroix     }
254047f7623dSRémi Lacroix   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
254147f7623dSRémi Lacroix   if (type == NORM_INFINITY) {
254247f7623dSRémi Lacroix     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
254347f7623dSRémi Lacroix   } else {
254447f7623dSRémi Lacroix     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
254547f7623dSRémi Lacroix   }
254647f7623dSRémi Lacroix   ierr = PetscFree(work);CHKERRQ(ierr);
254747f7623dSRémi Lacroix   if (type == NORM_2) {
254847f7623dSRémi Lacroix     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
254947f7623dSRémi Lacroix   }
255047f7623dSRémi Lacroix   PetscFunctionReturn(0);
255147f7623dSRémi Lacroix }
255247f7623dSRémi Lacroix 
255347f7623dSRémi Lacroix #undef __FUNCT__
2554bbead8a2SBarry Smith #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2555713ccfa9SJed Brown PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2556bbead8a2SBarry Smith {
2557bbead8a2SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2558bbead8a2SBarry Smith   PetscErrorCode ierr;
2559bbead8a2SBarry Smith 
2560bbead8a2SBarry Smith   PetscFunctionBegin;
2561bbead8a2SBarry Smith   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2562bbead8a2SBarry Smith   PetscFunctionReturn(0);
2563bbead8a2SBarry Smith }
2564bbead8a2SBarry Smith 
25658c7482ecSBarry Smith 
256679bdfe76SSatish Balay /* -------------------------------------------------------------------*/
25673964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2568cc2dc46cSBarry Smith                                        MatGetRow_MPIBAIJ,
2569cc2dc46cSBarry Smith                                        MatRestoreRow_MPIBAIJ,
2570cc2dc46cSBarry Smith                                        MatMult_MPIBAIJ,
257197304618SKris Buschelman                                 /* 4*/ MatMultAdd_MPIBAIJ,
25727c922b88SBarry Smith                                        MatMultTranspose_MPIBAIJ,
25737c922b88SBarry Smith                                        MatMultTransposeAdd_MPIBAIJ,
2574cc2dc46cSBarry Smith                                        0,
2575cc2dc46cSBarry Smith                                        0,
2576cc2dc46cSBarry Smith                                        0,
257797304618SKris Buschelman                                 /*10*/ 0,
2578cc2dc46cSBarry Smith                                        0,
2579cc2dc46cSBarry Smith                                        0,
2580b1a666ecSBarry Smith                                        MatSOR_MPIBAIJ,
2581cc2dc46cSBarry Smith                                        MatTranspose_MPIBAIJ,
258297304618SKris Buschelman                                 /*15*/ MatGetInfo_MPIBAIJ,
25837fc3c18eSBarry Smith                                        MatEqual_MPIBAIJ,
2584cc2dc46cSBarry Smith                                        MatGetDiagonal_MPIBAIJ,
2585cc2dc46cSBarry Smith                                        MatDiagonalScale_MPIBAIJ,
2586cc2dc46cSBarry Smith                                        MatNorm_MPIBAIJ,
258797304618SKris Buschelman                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2588cc2dc46cSBarry Smith                                        MatAssemblyEnd_MPIBAIJ,
2589cc2dc46cSBarry Smith                                        MatSetOption_MPIBAIJ,
2590cc2dc46cSBarry Smith                                        MatZeroEntries_MPIBAIJ,
2591d519adbfSMatthew Knepley                                 /*24*/ MatZeroRows_MPIBAIJ,
2592cc2dc46cSBarry Smith                                        0,
2593cc2dc46cSBarry Smith                                        0,
2594cc2dc46cSBarry Smith                                        0,
2595cc2dc46cSBarry Smith                                        0,
25964994cf47SJed Brown                                 /*29*/ MatSetUp_MPIBAIJ,
2597273d9f13SBarry Smith                                        0,
2598cc2dc46cSBarry Smith                                        0,
2599cc2dc46cSBarry Smith                                        0,
2600cc2dc46cSBarry Smith                                        0,
2601d519adbfSMatthew Knepley                                 /*34*/ MatDuplicate_MPIBAIJ,
2602cc2dc46cSBarry Smith                                        0,
2603cc2dc46cSBarry Smith                                        0,
2604cc2dc46cSBarry Smith                                        0,
2605cc2dc46cSBarry Smith                                        0,
2606d519adbfSMatthew Knepley                                 /*39*/ MatAXPY_MPIBAIJ,
2607cc2dc46cSBarry Smith                                        MatGetSubMatrices_MPIBAIJ,
2608cc2dc46cSBarry Smith                                        MatIncreaseOverlap_MPIBAIJ,
2609cc2dc46cSBarry Smith                                        MatGetValues_MPIBAIJ,
26103c896bc6SHong Zhang                                        MatCopy_MPIBAIJ,
2611d519adbfSMatthew Knepley                                 /*44*/ 0,
2612cc2dc46cSBarry Smith                                        MatScale_MPIBAIJ,
2613cc2dc46cSBarry Smith                                        0,
2614cc2dc46cSBarry Smith                                        0,
26156f0a72daSMatthew G. Knepley                                        MatZeroRowsColumns_MPIBAIJ,
2616f73d5cc4SBarry Smith                                 /*49*/ 0,
2617cc2dc46cSBarry Smith                                        0,
2618cc2dc46cSBarry Smith                                        0,
2619cc2dc46cSBarry Smith                                        0,
2620cc2dc46cSBarry Smith                                        0,
262193dfae19SHong Zhang                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2622cc2dc46cSBarry Smith                                        0,
2623cc2dc46cSBarry Smith                                        MatSetUnfactored_MPIBAIJ,
262482094794SBarry Smith                                        MatPermute_MPIBAIJ,
2625cc2dc46cSBarry Smith                                        MatSetValuesBlocked_MPIBAIJ,
2626d519adbfSMatthew Knepley                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2627f14a1c24SBarry Smith                                        MatDestroy_MPIBAIJ,
2628f14a1c24SBarry Smith                                        MatView_MPIBAIJ,
2629357abbc8SBarry Smith                                        0,
26307843d17aSBarry Smith                                        0,
2631d519adbfSMatthew Knepley                                 /*64*/ 0,
26327843d17aSBarry Smith                                        0,
26337843d17aSBarry Smith                                        0,
26347843d17aSBarry Smith                                        0,
26357843d17aSBarry Smith                                        0,
2636d519adbfSMatthew Knepley                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
26377843d17aSBarry Smith                                        0,
263897304618SKris Buschelman                                        0,
263997304618SKris Buschelman                                        0,
264097304618SKris Buschelman                                        0,
2641d519adbfSMatthew Knepley                                 /*74*/ 0,
2642f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
264397304618SKris Buschelman                                        0,
264497304618SKris Buschelman                                        0,
264597304618SKris Buschelman                                        0,
2646d519adbfSMatthew Knepley                                 /*79*/ 0,
264797304618SKris Buschelman                                        0,
264897304618SKris Buschelman                                        0,
264997304618SKris Buschelman                                        0,
26505bba2384SShri Abhyankar                                        MatLoad_MPIBAIJ,
2651d519adbfSMatthew Knepley                                 /*84*/ 0,
2652865e5f61SKris Buschelman                                        0,
2653865e5f61SKris Buschelman                                        0,
2654865e5f61SKris Buschelman                                        0,
2655865e5f61SKris Buschelman                                        0,
2656d519adbfSMatthew Knepley                                 /*89*/ 0,
2657865e5f61SKris Buschelman                                        0,
2658865e5f61SKris Buschelman                                        0,
2659865e5f61SKris Buschelman                                        0,
2660865e5f61SKris Buschelman                                        0,
2661d519adbfSMatthew Knepley                                 /*94*/ 0,
2662865e5f61SKris Buschelman                                        0,
2663865e5f61SKris Buschelman                                        0,
266499cafbc1SBarry Smith                                        0,
266599cafbc1SBarry Smith                                        0,
2666d519adbfSMatthew Knepley                                 /*99*/ 0,
266799cafbc1SBarry Smith                                        0,
266899cafbc1SBarry Smith                                        0,
266999cafbc1SBarry Smith                                        0,
267099cafbc1SBarry Smith                                        0,
2671d519adbfSMatthew Knepley                                 /*104*/0,
267299cafbc1SBarry Smith                                        MatRealPart_MPIBAIJ,
26738c7482ecSBarry Smith                                        MatImaginaryPart_MPIBAIJ,
26748c7482ecSBarry Smith                                        0,
26758c7482ecSBarry Smith                                        0,
2676d519adbfSMatthew Knepley                                 /*109*/0,
26778c7482ecSBarry Smith                                        0,
26788c7482ecSBarry Smith                                        0,
26798c7482ecSBarry Smith                                        0,
26808c7482ecSBarry Smith                                        0,
2681d1adec66SJed Brown                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
26828c7482ecSBarry Smith                                        0,
26834683f7a4SShri Abhyankar                                        MatGetGhosts_MPIBAIJ,
26844683f7a4SShri Abhyankar                                        0,
26854683f7a4SShri Abhyankar                                        0,
26864683f7a4SShri Abhyankar                                 /*119*/0,
26874683f7a4SShri Abhyankar                                        0,
26884683f7a4SShri Abhyankar                                        0,
2689bbead8a2SBarry Smith                                        0,
2690e8271787SHong Zhang                                        MatGetMultiProcBlock_MPIBAIJ,
2691bbead8a2SBarry Smith                                 /*124*/0,
269247f7623dSRémi Lacroix                                        MatGetColumnNorms_MPIBAIJ,
26933964eb88SJed Brown                                        MatInvertBlockDiagonal_MPIBAIJ,
26943964eb88SJed Brown                                        0,
26953964eb88SJed Brown                                        0,
26963964eb88SJed Brown                                /*129*/ 0,
26973964eb88SJed Brown                                        0,
26983964eb88SJed Brown                                        0,
26993964eb88SJed Brown                                        0,
27003964eb88SJed Brown                                        0,
27013964eb88SJed Brown                                /*134*/ 0,
27023964eb88SJed Brown                                        0,
27033964eb88SJed Brown                                        0,
27043964eb88SJed Brown                                        0,
27053964eb88SJed Brown                                        0,
27063964eb88SJed Brown                                /*139*/ 0,
2707f9426fe0SMark Adams                                        0,
27081919a2e2SJed Brown                                        0,
2709f86b9fbaSHong Zhang                                        MatFDColoringSetUp_MPIXAIJ
27108c7482ecSBarry Smith };
271179bdfe76SSatish Balay 
27124a2ae208SSatish Balay #undef __FUNCT__
27134a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
271411bd1e4dSLisandro Dalcin PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
27155ef9f2a5SBarry Smith {
27165ef9f2a5SBarry Smith   PetscFunctionBegin;
27175ef9f2a5SBarry Smith   *a = ((Mat_MPIBAIJ*)A->data)->A;
27185ef9f2a5SBarry Smith   PetscFunctionReturn(0);
27195ef9f2a5SBarry Smith }
272079bdfe76SSatish Balay 
27218cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2722d94109b8SHong Zhang 
2723aac34f13SBarry Smith #undef __FUNCT__
2724aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2725cf12db73SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2726aac34f13SBarry Smith {
2727b8d659d7SLisandro Dalcin   PetscInt       m,rstart,cstart,cend;
2728b8d659d7SLisandro Dalcin   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2729b8d659d7SLisandro Dalcin   const PetscInt *JJ    =0;
2730b8d659d7SLisandro Dalcin   PetscScalar    *values=0;
2731d47bf9aaSJed Brown   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2732aac34f13SBarry Smith   PetscErrorCode ierr;
2733aac34f13SBarry Smith 
2734aac34f13SBarry Smith   PetscFunctionBegin;
273526283091SBarry Smith   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
273626283091SBarry Smith   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
273726283091SBarry Smith   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
273826283091SBarry Smith   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2739e02043d6SBarry Smith   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2740d0f46423SBarry Smith   m      = B->rmap->n/bs;
2741d0f46423SBarry Smith   rstart = B->rmap->rstart/bs;
2742d0f46423SBarry Smith   cstart = B->cmap->rstart/bs;
2743d0f46423SBarry Smith   cend   = B->cmap->rend/bs;
2744b8d659d7SLisandro Dalcin 
2745e32f2f54SBarry Smith   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2746dcca6d9dSJed Brown   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2747aac34f13SBarry Smith   for (i=0; i<m; i++) {
2748cf12db73SBarry Smith     nz = ii[i+1] - ii[i];
2749e32f2f54SBarry Smith     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2750b8d659d7SLisandro Dalcin     nz_max = PetscMax(nz_max,nz);
2751cf12db73SBarry Smith     JJ     = jj + ii[i];
2752b8d659d7SLisandro Dalcin     for (j=0; j<nz; j++) {
2753aac34f13SBarry Smith       if (*JJ >= cstart) break;
2754aac34f13SBarry Smith       JJ++;
2755aac34f13SBarry Smith     }
2756aac34f13SBarry Smith     d = 0;
2757b8d659d7SLisandro Dalcin     for (; j<nz; j++) {
2758aac34f13SBarry Smith       if (*JJ++ >= cend) break;
2759aac34f13SBarry Smith       d++;
2760aac34f13SBarry Smith     }
2761aac34f13SBarry Smith     d_nnz[i] = d;
2762b8d659d7SLisandro Dalcin     o_nnz[i] = nz - d;
2763aac34f13SBarry Smith   }
2764aac34f13SBarry Smith   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2765fca92195SBarry Smith   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2766aac34f13SBarry Smith 
2767b8d659d7SLisandro Dalcin   values = (PetscScalar*)V;
2768b8d659d7SLisandro Dalcin   if (!values) {
2769785e854fSJed Brown     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2770b8d659d7SLisandro Dalcin     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2771b8d659d7SLisandro Dalcin   }
2772b8d659d7SLisandro Dalcin   for (i=0; i<m; i++) {
2773b8d659d7SLisandro Dalcin     PetscInt          row    = i + rstart;
2774cf12db73SBarry Smith     PetscInt          ncols  = ii[i+1] - ii[i];
2775cf12db73SBarry Smith     const PetscInt    *icols = jj + ii[i];
27763adadaf3SJed Brown     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2777cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2778b8d659d7SLisandro Dalcin       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
27793adadaf3SJed Brown     } else {                    /* block ordering does not match so we can only insert one block at a time. */
27803adadaf3SJed Brown       PetscInt j;
27813adadaf3SJed Brown       for (j=0; j<ncols; j++) {
27823adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
27833adadaf3SJed Brown         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
27843adadaf3SJed Brown       }
27853adadaf3SJed Brown     }
2786aac34f13SBarry Smith   }
2787aac34f13SBarry Smith 
2788b8d659d7SLisandro Dalcin   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2789aac34f13SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2790aac34f13SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
27917827cd58SJed Brown   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2792aac34f13SBarry Smith   PetscFunctionReturn(0);
2793aac34f13SBarry Smith }
2794aac34f13SBarry Smith 
2795aac34f13SBarry Smith #undef __FUNCT__
2796aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2797aac34f13SBarry Smith /*@C
2798dfb205c3SBarry Smith    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2799aac34f13SBarry Smith    (the default parallel PETSc format).
2800aac34f13SBarry Smith 
2801aac34f13SBarry Smith    Collective on MPI_Comm
2802aac34f13SBarry Smith 
2803aac34f13SBarry Smith    Input Parameters:
28041c4f3114SJed Brown +  B - the matrix
2805dfb205c3SBarry Smith .  bs - the block size
2806aac34f13SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2807aac34f13SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2808aac34f13SBarry Smith -  v - optional values in the matrix
2809aac34f13SBarry Smith 
2810aac34f13SBarry Smith    Level: developer
2811aac34f13SBarry Smith 
28123adadaf3SJed Brown    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
28133adadaf3SJed Brown    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
28143adadaf3SJed Brown    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
28153adadaf3SJed Brown    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
28163adadaf3SJed Brown    block column and the second index is over columns within a block.
28173adadaf3SJed Brown 
2818aac34f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel
2819aac34f13SBarry Smith 
28203adadaf3SJed Brown .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2821aac34f13SBarry Smith @*/
28227087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2823aac34f13SBarry Smith {
28244ac538c5SBarry Smith   PetscErrorCode ierr;
2825aac34f13SBarry Smith 
2826aac34f13SBarry Smith   PetscFunctionBegin;
28276ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
28286ba663aaSJed Brown   PetscValidType(B,1);
28296ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
28304ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2831aac34f13SBarry Smith   PetscFunctionReturn(0);
2832aac34f13SBarry Smith }
2833aac34f13SBarry Smith 
28344a2ae208SSatish Balay #undef __FUNCT__
2835a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2836b2573a8aSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2837a23d5eceSKris Buschelman {
2838a23d5eceSKris Buschelman   Mat_MPIBAIJ    *b;
2839dfbe8321SBarry Smith   PetscErrorCode ierr;
2840535b19f3SBarry Smith   PetscInt       i;
2841a23d5eceSKris Buschelman 
2842a23d5eceSKris Buschelman   PetscFunctionBegin;
284333d57670SJed Brown   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
284426283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
284526283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2846e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2847899cda47SBarry Smith 
2848a23d5eceSKris Buschelman   if (d_nnz) {
2849d0f46423SBarry Smith     for (i=0; i<B->rmap->n/bs; i++) {
2850e32f2f54SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2851a23d5eceSKris Buschelman     }
2852a23d5eceSKris Buschelman   }
2853a23d5eceSKris Buschelman   if (o_nnz) {
2854d0f46423SBarry Smith     for (i=0; i<B->rmap->n/bs; i++) {
2855e32f2f54SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2856a23d5eceSKris Buschelman     }
2857a23d5eceSKris Buschelman   }
2858a23d5eceSKris Buschelman 
2859a23d5eceSKris Buschelman   b      = (Mat_MPIBAIJ*)B->data;
2860a23d5eceSKris Buschelman   b->bs2 = bs*bs;
2861d0f46423SBarry Smith   b->mbs = B->rmap->n/bs;
2862d0f46423SBarry Smith   b->nbs = B->cmap->n/bs;
2863d0f46423SBarry Smith   b->Mbs = B->rmap->N/bs;
2864d0f46423SBarry Smith   b->Nbs = B->cmap->N/bs;
2865a23d5eceSKris Buschelman 
2866a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2867d0f46423SBarry Smith     b->rangebs[i] = B->rmap->range[i]/bs;
2868a23d5eceSKris Buschelman   }
2869d0f46423SBarry Smith   b->rstartbs = B->rmap->rstart/bs;
2870d0f46423SBarry Smith   b->rendbs   = B->rmap->rend/bs;
2871d0f46423SBarry Smith   b->cstartbs = B->cmap->rstart/bs;
2872d0f46423SBarry Smith   b->cendbs   = B->cmap->rend/bs;
2873a23d5eceSKris Buschelman 
2874526dfc15SBarry Smith   if (!B->preallocated) {
2875f69a0ea3SMatthew Knepley     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2876d0f46423SBarry Smith     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
28779c097c71SKris Buschelman     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
28783bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2879f69a0ea3SMatthew Knepley     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2880d0f46423SBarry Smith     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
28819c097c71SKris Buschelman     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
28823bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2883ce94432eSBarry Smith     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2884526dfc15SBarry Smith   }
2885a23d5eceSKris Buschelman 
2886526dfc15SBarry Smith   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2887526dfc15SBarry Smith   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2888526dfc15SBarry Smith   B->preallocated = PETSC_TRUE;
2889a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2890a23d5eceSKris Buschelman }
2891a23d5eceSKris Buschelman 
28927087cfbeSBarry Smith extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
28937087cfbeSBarry Smith extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
28945bf65638SKris Buschelman 
289582094794SBarry Smith #undef __FUNCT__
289682094794SBarry Smith #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
28978cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
289882094794SBarry Smith {
289982094794SBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
290082094794SBarry Smith   PetscErrorCode ierr;
290182094794SBarry Smith   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
290282094794SBarry Smith   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
290382094794SBarry Smith   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
290482094794SBarry Smith 
290582094794SBarry Smith   PetscFunctionBegin;
2906785e854fSJed Brown   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
290782094794SBarry Smith   ii[0] = 0;
290882094794SBarry Smith   for (i=0; i<M; i++) {
2909e32f2f54SBarry Smith     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2910e32f2f54SBarry Smith     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
291182094794SBarry Smith     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
29125ee9ba1cSJed Brown     /* remove one from count of matrix has diagonal */
29135ee9ba1cSJed Brown     for (j=id[i]; j<id[i+1]; j++) {
29145ee9ba1cSJed Brown       if (jd[j] == i) {ii[i+1]--;break;}
29155ee9ba1cSJed Brown     }
291682094794SBarry Smith   }
2917785e854fSJed Brown   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
291882094794SBarry Smith   cnt  = 0;
291982094794SBarry Smith   for (i=0; i<M; i++) {
292082094794SBarry Smith     for (j=io[i]; j<io[i+1]; j++) {
292182094794SBarry Smith       if (garray[jo[j]] > rstart) break;
292282094794SBarry Smith       jj[cnt++] = garray[jo[j]];
292382094794SBarry Smith     }
292482094794SBarry Smith     for (k=id[i]; k<id[i+1]; k++) {
29255ee9ba1cSJed Brown       if (jd[k] != i) {
292682094794SBarry Smith         jj[cnt++] = rstart + jd[k];
292782094794SBarry Smith       }
29285ee9ba1cSJed Brown     }
292982094794SBarry Smith     for (; j<io[i+1]; j++) {
293082094794SBarry Smith       jj[cnt++] = garray[jo[j]];
293182094794SBarry Smith     }
293282094794SBarry Smith   }
2933ce94432eSBarry Smith   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
293482094794SBarry Smith   PetscFunctionReturn(0);
293582094794SBarry Smith }
293682094794SBarry Smith 
2937c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
293862471d69SBarry Smith 
29398cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2940b2573a8aSBarry Smith 
294162471d69SBarry Smith #undef __FUNCT__
294262471d69SBarry Smith #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
29438cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
294462471d69SBarry Smith {
294562471d69SBarry Smith   PetscErrorCode ierr;
294662471d69SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
294762471d69SBarry Smith   Mat            B;
294885a69837SSatish Balay   Mat_MPIAIJ     *b;
294962471d69SBarry Smith 
295062471d69SBarry Smith   PetscFunctionBegin;
2951ce94432eSBarry Smith   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
295262471d69SBarry Smith 
2953ce94432eSBarry Smith   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
295462471d69SBarry Smith   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
29556d0a4a0eSHong Zhang   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
29560298fd71SBarry Smith   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
29570298fd71SBarry Smith   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
295862471d69SBarry Smith   b    = (Mat_MPIAIJ*) B->data;
295962471d69SBarry Smith 
29606bf464f9SBarry Smith   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
29616bf464f9SBarry Smith   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2962ab9863d7SBarry Smith   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
296362471d69SBarry Smith   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
296462471d69SBarry Smith   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
296562471d69SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
296662471d69SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
29676a719282SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
29686a719282SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
296962471d69SBarry Smith   if (reuse == MAT_REUSE_MATRIX) {
297062471d69SBarry Smith     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
297162471d69SBarry Smith   } else {
297262471d69SBarry Smith    *newmat = B;
297362471d69SBarry Smith   }
297462471d69SBarry Smith   PetscFunctionReturn(0);
297562471d69SBarry Smith }
297662471d69SBarry Smith 
2977450b117fSShri Abhyankar #if defined(PETSC_HAVE_MUMPS)
29788cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2979450b117fSShri Abhyankar #endif
2980450b117fSShri Abhyankar 
29810bad9183SKris Buschelman /*MC
2982fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
29830bad9183SKris Buschelman 
29840bad9183SKris Buschelman    Options Database Keys:
29858c07d4e3SBarry Smith + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
29868c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix
29878c07d4e3SBarry Smith - -mat_use_hash_table <fact>
29880bad9183SKris Buschelman 
29890bad9183SKris Buschelman   Level: beginner
29900bad9183SKris Buschelman 
29910bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ
29920bad9183SKris Buschelman M*/
29930bad9183SKris Buschelman 
29948cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2995c0cdd4a1SDahai Guo 
2996a23d5eceSKris Buschelman #undef __FUNCT__
29974a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ"
29988cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2999273d9f13SBarry Smith {
3000273d9f13SBarry Smith   Mat_MPIBAIJ    *b;
3001dfbe8321SBarry Smith   PetscErrorCode ierr;
3002ace3abfcSBarry Smith   PetscBool      flg;
3003273d9f13SBarry Smith 
3004273d9f13SBarry Smith   PetscFunctionBegin;
3005b00a9115SJed Brown   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
300682502324SSatish Balay   B->data = (void*)b;
300782502324SSatish Balay 
3008273d9f13SBarry Smith   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3009273d9f13SBarry Smith   B->assembled = PETSC_FALSE;
3010273d9f13SBarry Smith 
3011273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
3012ce94432eSBarry Smith   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3013ce94432eSBarry Smith   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3014273d9f13SBarry Smith 
3015273d9f13SBarry Smith   /* build local table of row and column ownerships */
3016785e854fSJed Brown   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
3017273d9f13SBarry Smith 
3018273d9f13SBarry Smith   /* build cache for off array entries formed */
3019ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
302026fbe8dcSKarl Rupp 
3021273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
30220298fd71SBarry Smith   b->colmap      = NULL;
30230298fd71SBarry Smith   b->garray      = NULL;
3024273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
3025273d9f13SBarry Smith 
3026273d9f13SBarry Smith   /* stuff used in block assembly */
3027273d9f13SBarry Smith   b->barray = 0;
3028273d9f13SBarry Smith 
3029273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
3030273d9f13SBarry Smith   b->lvec  = 0;
3031273d9f13SBarry Smith   b->Mvctx = 0;
3032273d9f13SBarry Smith 
3033273d9f13SBarry Smith   /* stuff for MatGetRow() */
3034273d9f13SBarry Smith   b->rowindices   = 0;
3035273d9f13SBarry Smith   b->rowvalues    = 0;
3036273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
3037273d9f13SBarry Smith 
3038273d9f13SBarry Smith   /* hash table stuff */
3039273d9f13SBarry Smith   b->ht           = 0;
3040273d9f13SBarry Smith   b->hd           = 0;
3041273d9f13SBarry Smith   b->ht_size      = 0;
3042273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
3043273d9f13SBarry Smith   b->ht_fact      = 0;
3044273d9f13SBarry Smith   b->ht_total_ct  = 0;
3045273d9f13SBarry Smith   b->ht_insert_ct = 0;
3046273d9f13SBarry Smith 
30477a868f3eSHong Zhang   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
30487a868f3eSHong Zhang   b->ijonly = PETSC_FALSE;
30497a868f3eSHong Zhang 
3050ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
30510298fd71SBarry Smith   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3052273d9f13SBarry Smith   if (flg) {
3053f6275e2eSBarry Smith     PetscReal fact = 1.39;
30544e0d8c25SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
30550298fd71SBarry Smith     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3056273d9f13SBarry Smith     if (fact <= 1.0) fact = 1.39;
3057273d9f13SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
30581e2582c4SBarry Smith     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3059273d9f13SBarry Smith   }
30608c07d4e3SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
30618c07d4e3SBarry Smith 
3062450b117fSShri Abhyankar #if defined(PETSC_HAVE_MUMPS)
3063bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3064450b117fSShri Abhyankar #endif
3065bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3066bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3067bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3068bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3069bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3070bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3071bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3072bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3073bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3074bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3075bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
307617667f90SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3077273d9f13SBarry Smith   PetscFunctionReturn(0);
3078273d9f13SBarry Smith }
3079273d9f13SBarry Smith 
3080209238afSKris Buschelman /*MC
3081002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3082209238afSKris Buschelman 
3083209238afSKris Buschelman    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3084209238afSKris Buschelman    and MATMPIBAIJ otherwise.
3085209238afSKris Buschelman 
3086209238afSKris Buschelman    Options Database Keys:
3087209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3088209238afSKris Buschelman 
3089209238afSKris Buschelman   Level: beginner
3090209238afSKris Buschelman 
309169b1f4b7SBarry Smith .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3092209238afSKris Buschelman M*/
3093209238afSKris Buschelman 
30944a2ae208SSatish Balay #undef __FUNCT__
30954a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3096273d9f13SBarry Smith /*@C
3097aac34f13SBarry Smith    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3098273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
3099273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
3100273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3101273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
3102273d9f13SBarry Smith 
3103273d9f13SBarry Smith    Collective on Mat
3104273d9f13SBarry Smith 
3105273d9f13SBarry Smith    Input Parameters:
31061c4f3114SJed Brown +  B - the matrix
3107e8271787SHong Zhang .  bs   - size of block
3108273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3109273d9f13SBarry Smith            submatrix  (same for all local rows)
3110273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
3111273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
31120298fd71SBarry Smith            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
311395742e49SBarry Smith            set it even if it is zero.
3114273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3115273d9f13SBarry Smith            submatrix (same for all local rows).
3116273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
3117273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
31180298fd71SBarry Smith            each block row) or NULL.
3119273d9f13SBarry Smith 
312049a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
3121273d9f13SBarry Smith 
3122273d9f13SBarry Smith    Options Database Keys:
31238c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
31248c07d4e3SBarry Smith -   -mat_use_hash_table <fact>
3125273d9f13SBarry Smith 
3126273d9f13SBarry Smith    Notes:
3127273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3128273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
3129273d9f13SBarry Smith 
3130273d9f13SBarry Smith    Storage Information:
3131273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
3132273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
3133273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
3134273d9f13SBarry Smith    local matrix (a rectangular submatrix).
3135273d9f13SBarry Smith 
3136273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
3137273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
31380298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3139273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
3140273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3141273d9f13SBarry Smith 
3142273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3143273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
3144273d9f13SBarry Smith 
3145273d9f13SBarry Smith .vb
3146273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
3147a4b1a0f6SJed Brown           --------------------------
3148273d9f13SBarry Smith    row 3  |o o o d d d o o o o  o  o
3149273d9f13SBarry Smith    row 4  |o o o d d d o o o o  o  o
3150273d9f13SBarry Smith    row 5  |o o o d d d o o o o  o  o
3151a4b1a0f6SJed Brown           --------------------------
3152273d9f13SBarry Smith .ve
3153273d9f13SBarry Smith 
3154273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
3155273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
3156273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3157273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
3158273d9f13SBarry Smith 
3159273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3160273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3161273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
3162273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3163273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
3164273d9f13SBarry Smith    matrices.
3165273d9f13SBarry Smith 
3166aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
3167aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3168aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3169aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3170aa95bbe8SBarry Smith 
3171273d9f13SBarry Smith    Level: intermediate
3172273d9f13SBarry Smith 
3173273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel
3174273d9f13SBarry Smith 
3175ab978733SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3176273d9f13SBarry Smith @*/
31777087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3178273d9f13SBarry Smith {
31794ac538c5SBarry Smith   PetscErrorCode ierr;
3180273d9f13SBarry Smith 
3181273d9f13SBarry Smith   PetscFunctionBegin;
31826ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
31836ba663aaSJed Brown   PetscValidType(B,1);
31846ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
31854ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3186273d9f13SBarry Smith   PetscFunctionReturn(0);
3187273d9f13SBarry Smith }
3188273d9f13SBarry Smith 
31894a2ae208SSatish Balay #undef __FUNCT__
319069b1f4b7SBarry Smith #define __FUNCT__ "MatCreateBAIJ"
319179bdfe76SSatish Balay /*@C
319269b1f4b7SBarry Smith    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
319379bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
319479bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
319579bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
319679bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
319779bdfe76SSatish Balay 
3198db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
3199db81eaa0SLois Curfman McInnes 
320079bdfe76SSatish Balay    Input Parameters:
3201db81eaa0SLois Curfman McInnes +  comm - MPI communicator
320279bdfe76SSatish Balay .  bs   - size of blockk
320379bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
320492e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
320592e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
320692e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
320792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
320892e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
3209be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3210be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
321147a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
321279bdfe76SSatish Balay            submatrix  (same for all local rows)
321347a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
321492e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
32150298fd71SBarry Smith            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
321695742e49SBarry Smith            and set it even if it is zero.
321747a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
321879bdfe76SSatish Balay            submatrix (same for all local rows).
321947a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
322092e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
32210298fd71SBarry Smith            each block row) or NULL.
322279bdfe76SSatish Balay 
322379bdfe76SSatish Balay    Output Parameter:
322479bdfe76SSatish Balay .  A - the matrix
322579bdfe76SSatish Balay 
3226db81eaa0SLois Curfman McInnes    Options Database Keys:
32278c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
32288c07d4e3SBarry Smith -   -mat_use_hash_table <fact>
32293ffaccefSLois Curfman McInnes 
3230175b88e8SBarry Smith    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3231ae1d86c5SBarry Smith    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3232175b88e8SBarry Smith    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3233175b88e8SBarry Smith 
3234b259b22eSLois Curfman McInnes    Notes:
323549a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
323649a6f317SBarry Smith 
323747a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
323847a75d0bSBarry Smith 
323979bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
324079bdfe76SSatish Balay    (possibly both).
324179bdfe76SSatish Balay 
3242be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3243be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
3244be79a94dSBarry Smith 
324579bdfe76SSatish Balay    Storage Information:
324679bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
324779bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
324879bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
324979bdfe76SSatish Balay    local matrix (a rectangular submatrix).
325079bdfe76SSatish Balay 
325179bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
325279bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
32530298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
325479bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
325579bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
325679bdfe76SSatish Balay 
325779bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
325879bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
325979bdfe76SSatish Balay 
3260db81eaa0SLois Curfman McInnes .vb
3261db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
3262a4b1a0f6SJed Brown           --------------------------
3263db81eaa0SLois Curfman McInnes    row 3  |o o o d d d o o o o  o  o
3264db81eaa0SLois Curfman McInnes    row 4  |o o o d d d o o o o  o  o
3265db81eaa0SLois Curfman McInnes    row 5  |o o o d d d o o o o  o  o
3266a4b1a0f6SJed Brown           --------------------------
3267db81eaa0SLois Curfman McInnes .ve
326879bdfe76SSatish Balay 
326979bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
327079bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
327179bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
327257b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
327379bdfe76SSatish Balay 
3274d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3275d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
327679bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
327792e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
327892e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
32796da5968aSLois Curfman McInnes    matrices.
328079bdfe76SSatish Balay 
3281027ccd11SLois Curfman McInnes    Level: intermediate
3282027ccd11SLois Curfman McInnes 
328392e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
328479bdfe76SSatish Balay 
328569b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
328679bdfe76SSatish Balay @*/
328769b1f4b7SBarry Smith PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
328879bdfe76SSatish Balay {
32896849ba73SBarry Smith   PetscErrorCode ierr;
3290b24ad042SBarry Smith   PetscMPIInt    size;
329179bdfe76SSatish Balay 
3292d64ed03dSBarry Smith   PetscFunctionBegin;
3293f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3294f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3295d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3296273d9f13SBarry Smith   if (size > 1) {
3297273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3298273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3299273d9f13SBarry Smith   } else {
3300273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3301273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
33023914022bSBarry Smith   }
33033a40ed3dSBarry Smith   PetscFunctionReturn(0);
330479bdfe76SSatish Balay }
3305026e39d0SSatish Balay 
33064a2ae208SSatish Balay #undef __FUNCT__
33074a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ"
33086849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
33090ac07820SSatish Balay {
33100ac07820SSatish Balay   Mat            mat;
33110ac07820SSatish Balay   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3312dfbe8321SBarry Smith   PetscErrorCode ierr;
3313b24ad042SBarry Smith   PetscInt       len=0;
33140ac07820SSatish Balay 
3315d64ed03dSBarry Smith   PetscFunctionBegin;
33160ac07820SSatish Balay   *newmat = 0;
3317ce94432eSBarry Smith   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3318d0f46423SBarry Smith   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
33197adad957SLisandro Dalcin   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
33201d5dac46SHong Zhang   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
33217fff6886SHong Zhang 
3322d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
3323273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
33240ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
33257fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
33267fff6886SHong Zhang 
3327273d9f13SBarry Smith   a             = (Mat_MPIBAIJ*)mat->data;
3328d0f46423SBarry Smith   mat->rmap->bs = matin->rmap->bs;
33290ac07820SSatish Balay   a->bs2        = oldmat->bs2;
33300ac07820SSatish Balay   a->mbs        = oldmat->mbs;
33310ac07820SSatish Balay   a->nbs        = oldmat->nbs;
33320ac07820SSatish Balay   a->Mbs        = oldmat->Mbs;
33330ac07820SSatish Balay   a->Nbs        = oldmat->Nbs;
33340ac07820SSatish Balay 
33351e1e43feSBarry Smith   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
33361e1e43feSBarry Smith   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3337899cda47SBarry Smith 
33380ac07820SSatish Balay   a->size         = oldmat->size;
33390ac07820SSatish Balay   a->rank         = oldmat->rank;
3340aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
3341aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
3342aef5e8e0SSatish Balay   a->rowindices   = 0;
33430ac07820SSatish Balay   a->rowvalues    = 0;
33440ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
334530793edcSSatish Balay   a->barray       = 0;
3346899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
3347899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
3348899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
3349899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
33500ac07820SSatish Balay 
3351133cdb44SSatish Balay   /* hash table stuff */
3352133cdb44SSatish Balay   a->ht           = 0;
3353133cdb44SSatish Balay   a->hd           = 0;
3354133cdb44SSatish Balay   a->ht_size      = 0;
3355133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
335625fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
3357133cdb44SSatish Balay   a->ht_total_ct  = 0;
3358133cdb44SSatish Balay   a->ht_insert_ct = 0;
3359133cdb44SSatish Balay 
3360899cda47SBarry Smith   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
33610ac07820SSatish Balay   if (oldmat->colmap) {
3362aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
33630f5bd95cSBarry Smith     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
336448e59246SSatish Balay #else
3365785e854fSJed Brown     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
33663bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3367b24ad042SBarry Smith     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
336848e59246SSatish Balay #endif
33690ac07820SSatish Balay   } else a->colmap = 0;
33704beb1cfeSHong Zhang 
33710ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3372785e854fSJed Brown     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
33733bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3374b24ad042SBarry Smith     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
33750ac07820SSatish Balay   } else a->garray = 0;
33760ac07820SSatish Balay 
3377ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
33780ac07820SSatish Balay   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
33793bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
33800ac07820SSatish Balay   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
33813bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
33827fff6886SHong Zhang 
33832e8a6d31SBarry Smith   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
33843bb1ff40SBarry Smith   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
33852e8a6d31SBarry Smith   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
33863bb1ff40SBarry Smith   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3387140e18c1SBarry Smith   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
33880ac07820SSatish Balay   *newmat = mat;
33893a40ed3dSBarry Smith   PetscFunctionReturn(0);
33900ac07820SSatish Balay }
339157b952d6SSatish Balay 
33924a2ae208SSatish Balay #undef __FUNCT__
33935bba2384SShri Abhyankar #define __FUNCT__ "MatLoad_MPIBAIJ"
3394112444f4SShri Abhyankar PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
33954683f7a4SShri Abhyankar {
33964683f7a4SShri Abhyankar   PetscErrorCode ierr;
33974683f7a4SShri Abhyankar   int            fd;
33984683f7a4SShri Abhyankar   PetscInt       i,nz,j,rstart,rend;
33994683f7a4SShri Abhyankar   PetscScalar    *vals,*buf;
3400ce94432eSBarry Smith   MPI_Comm       comm;
34014683f7a4SShri Abhyankar   MPI_Status     status;
34024683f7a4SShri Abhyankar   PetscMPIInt    rank,size,maxnz;
34034683f7a4SShri Abhyankar   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
34040298fd71SBarry Smith   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
34053059b6faSBarry Smith   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
34064683f7a4SShri Abhyankar   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
34070298fd71SBarry Smith   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
34084683f7a4SShri Abhyankar   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
34094683f7a4SShri Abhyankar 
34104683f7a4SShri Abhyankar   PetscFunctionBegin;
3411ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
34120298fd71SBarry Smith   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
34130298fd71SBarry Smith   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
34144683f7a4SShri Abhyankar   ierr = PetscOptionsEnd();CHKERRQ(ierr);
34153059b6faSBarry Smith   if (bs < 0) bs = 1;
34164683f7a4SShri Abhyankar 
34174683f7a4SShri Abhyankar   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
34184683f7a4SShri Abhyankar   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
34194683f7a4SShri Abhyankar   if (!rank) {
34204683f7a4SShri Abhyankar     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
34214683f7a4SShri Abhyankar     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
34224683f7a4SShri Abhyankar     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
34234683f7a4SShri Abhyankar   }
34244683f7a4SShri Abhyankar 
34254683f7a4SShri Abhyankar   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
34264683f7a4SShri Abhyankar 
34274683f7a4SShri Abhyankar   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
34284683f7a4SShri Abhyankar   M    = header[1]; N = header[2];
34294683f7a4SShri Abhyankar 
34304683f7a4SShri Abhyankar   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
34314683f7a4SShri Abhyankar   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
34324683f7a4SShri Abhyankar   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
34334683f7a4SShri Abhyankar 
34344683f7a4SShri Abhyankar   /* If global sizes are set, check if they are consistent with that given in the file */
34354683f7a4SShri Abhyankar   if (sizesset) {
34364683f7a4SShri Abhyankar     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
34374683f7a4SShri Abhyankar   }
3438abd38a8fSBarry Smith   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3439abd38a8fSBarry Smith   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
34404683f7a4SShri Abhyankar 
3441ce94432eSBarry Smith   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
34424683f7a4SShri Abhyankar 
34434683f7a4SShri Abhyankar   /*
34444683f7a4SShri Abhyankar      This code adds extra rows to make sure the number of rows is
34454683f7a4SShri Abhyankar      divisible by the blocksize
34464683f7a4SShri Abhyankar   */
34474683f7a4SShri Abhyankar   Mbs        = M/bs;
34484683f7a4SShri Abhyankar   extra_rows = bs - M + bs*Mbs;
34494683f7a4SShri Abhyankar   if (extra_rows == bs) extra_rows = 0;
34504683f7a4SShri Abhyankar   else                  Mbs++;
34514683f7a4SShri Abhyankar   if (extra_rows && !rank) {
34524683f7a4SShri Abhyankar     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
34534683f7a4SShri Abhyankar   }
34544683f7a4SShri Abhyankar 
34554683f7a4SShri Abhyankar   /* determine ownership of all rows */
34564683f7a4SShri Abhyankar   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
34574683f7a4SShri Abhyankar     mbs = Mbs/size + ((Mbs % size) > rank);
34584683f7a4SShri Abhyankar     m   = mbs*bs;
34594683f7a4SShri Abhyankar   } else { /* User set */
34604683f7a4SShri Abhyankar     m   = newmat->rmap->n;
34614683f7a4SShri Abhyankar     mbs = m/bs;
34624683f7a4SShri Abhyankar   }
3463dcca6d9dSJed Brown   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
34644683f7a4SShri Abhyankar   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
34654683f7a4SShri Abhyankar 
34664683f7a4SShri Abhyankar   /* process 0 needs enough room for process with most rows */
34674683f7a4SShri Abhyankar   if (!rank) {
34684683f7a4SShri Abhyankar     mmax = rowners[1];
34691251c579SMatthew G Knepley     for (i=2; i<=size; i++) {
34704683f7a4SShri Abhyankar       mmax = PetscMax(mmax,rowners[i]);
34714683f7a4SShri Abhyankar     }
34724683f7a4SShri Abhyankar     mmax*=bs;
34733964eb88SJed Brown   } else mmax = -1;             /* unused, but compiler warns anyway */
34744683f7a4SShri Abhyankar 
34754683f7a4SShri Abhyankar   rowners[0] = 0;
34764683f7a4SShri Abhyankar   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
34774683f7a4SShri Abhyankar   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
34784683f7a4SShri Abhyankar   rstart = rowners[rank];
34794683f7a4SShri Abhyankar   rend   = rowners[rank+1];
34804683f7a4SShri Abhyankar 
34814683f7a4SShri Abhyankar   /* distribute row lengths to all processors */
3482785e854fSJed Brown   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
34834683f7a4SShri Abhyankar   if (!rank) {
34844683f7a4SShri Abhyankar     mend = m;
34854683f7a4SShri Abhyankar     if (size == 1) mend = mend - extra_rows;
34864683f7a4SShri Abhyankar     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
34874683f7a4SShri Abhyankar     for (j=mend; j<m; j++) locrowlens[j] = 1;
3488785e854fSJed Brown     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
34891795a4d1SJed Brown     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
34904683f7a4SShri Abhyankar     for (j=0; j<m; j++) {
34914683f7a4SShri Abhyankar       procsnz[0] += locrowlens[j];
34924683f7a4SShri Abhyankar     }
34934683f7a4SShri Abhyankar     for (i=1; i<size; i++) {
34944683f7a4SShri Abhyankar       mend = browners[i+1] - browners[i];
34954683f7a4SShri Abhyankar       if (i == size-1) mend = mend - extra_rows;
34964683f7a4SShri Abhyankar       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
34974683f7a4SShri Abhyankar       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
34984683f7a4SShri Abhyankar       /* calculate the number of nonzeros on each processor */
34994683f7a4SShri Abhyankar       for (j=0; j<browners[i+1]-browners[i]; j++) {
35004683f7a4SShri Abhyankar         procsnz[i] += rowlengths[j];
35014683f7a4SShri Abhyankar       }
35024683f7a4SShri Abhyankar       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
35034683f7a4SShri Abhyankar     }
35044683f7a4SShri Abhyankar     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
35054683f7a4SShri Abhyankar   } else {
35064683f7a4SShri Abhyankar     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
35074683f7a4SShri Abhyankar   }
35084683f7a4SShri Abhyankar 
35094683f7a4SShri Abhyankar   if (!rank) {
35104683f7a4SShri Abhyankar     /* determine max buffer needed and allocate it */
35114683f7a4SShri Abhyankar     maxnz = procsnz[0];
35124683f7a4SShri Abhyankar     for (i=1; i<size; i++) {
35134683f7a4SShri Abhyankar       maxnz = PetscMax(maxnz,procsnz[i]);
35144683f7a4SShri Abhyankar     }
3515785e854fSJed Brown     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
35164683f7a4SShri Abhyankar 
35174683f7a4SShri Abhyankar     /* read in my part of the matrix column indices  */
35184683f7a4SShri Abhyankar     nz     = procsnz[0];
3519785e854fSJed Brown     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
35204683f7a4SShri Abhyankar     mycols = ibuf;
35214683f7a4SShri Abhyankar     if (size == 1) nz -= extra_rows;
35224683f7a4SShri Abhyankar     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
352326fbe8dcSKarl Rupp     if (size == 1) {
352426fbe8dcSKarl Rupp       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
352526fbe8dcSKarl Rupp     }
35264683f7a4SShri Abhyankar 
35274683f7a4SShri Abhyankar     /* read in every ones (except the last) and ship off */
35284683f7a4SShri Abhyankar     for (i=1; i<size-1; i++) {
35294683f7a4SShri Abhyankar       nz   = procsnz[i];
35304683f7a4SShri Abhyankar       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
35314683f7a4SShri Abhyankar       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
35324683f7a4SShri Abhyankar     }
35334683f7a4SShri Abhyankar     /* read in the stuff for the last proc */
35344683f7a4SShri Abhyankar     if (size != 1) {
35354683f7a4SShri Abhyankar       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
35364683f7a4SShri Abhyankar       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
35374683f7a4SShri Abhyankar       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
35384683f7a4SShri Abhyankar       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
35394683f7a4SShri Abhyankar     }
35404683f7a4SShri Abhyankar     ierr = PetscFree(cols);CHKERRQ(ierr);
35414683f7a4SShri Abhyankar   } else {
35424683f7a4SShri Abhyankar     /* determine buffer space needed for message */
35434683f7a4SShri Abhyankar     nz = 0;
35444683f7a4SShri Abhyankar     for (i=0; i<m; i++) {
35454683f7a4SShri Abhyankar       nz += locrowlens[i];
35464683f7a4SShri Abhyankar     }
3547785e854fSJed Brown     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
35484683f7a4SShri Abhyankar     mycols = ibuf;
35494683f7a4SShri Abhyankar     /* receive message of column indices*/
35504683f7a4SShri Abhyankar     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
35514683f7a4SShri Abhyankar     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
35524683f7a4SShri Abhyankar     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
35534683f7a4SShri Abhyankar   }
35544683f7a4SShri Abhyankar 
35554683f7a4SShri Abhyankar   /* loop over local rows, determining number of off diagonal entries */
3556dcca6d9dSJed Brown   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
35571795a4d1SJed Brown   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
35584683f7a4SShri Abhyankar   rowcount = 0; nzcount = 0;
35594683f7a4SShri Abhyankar   for (i=0; i<mbs; i++) {
35604683f7a4SShri Abhyankar     dcount  = 0;
35614683f7a4SShri Abhyankar     odcount = 0;
35624683f7a4SShri Abhyankar     for (j=0; j<bs; j++) {
35634683f7a4SShri Abhyankar       kmax = locrowlens[rowcount];
35644683f7a4SShri Abhyankar       for (k=0; k<kmax; k++) {
35654683f7a4SShri Abhyankar         tmp = mycols[nzcount++]/bs;
35664683f7a4SShri Abhyankar         if (!mask[tmp]) {
35674683f7a4SShri Abhyankar           mask[tmp] = 1;
35684683f7a4SShri Abhyankar           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
35694683f7a4SShri Abhyankar           else masked1[dcount++] = tmp;
35704683f7a4SShri Abhyankar         }
35714683f7a4SShri Abhyankar       }
35724683f7a4SShri Abhyankar       rowcount++;
35734683f7a4SShri Abhyankar     }
35744683f7a4SShri Abhyankar 
35754683f7a4SShri Abhyankar     dlens[i]  = dcount;
35764683f7a4SShri Abhyankar     odlens[i] = odcount;
35774683f7a4SShri Abhyankar 
35784683f7a4SShri Abhyankar     /* zero out the mask elements we set */
35794683f7a4SShri Abhyankar     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
35804683f7a4SShri Abhyankar     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
35814683f7a4SShri Abhyankar   }
35824683f7a4SShri Abhyankar 
35834683f7a4SShri Abhyankar 
35844683f7a4SShri Abhyankar   if (!sizesset) {
35854683f7a4SShri Abhyankar     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
35864683f7a4SShri Abhyankar   }
35874683f7a4SShri Abhyankar   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
35884683f7a4SShri Abhyankar 
35894683f7a4SShri Abhyankar   if (!rank) {
3590785e854fSJed Brown     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
35914683f7a4SShri Abhyankar     /* read in my part of the matrix numerical values  */
35924683f7a4SShri Abhyankar     nz     = procsnz[0];
35934683f7a4SShri Abhyankar     vals   = buf;
35944683f7a4SShri Abhyankar     mycols = ibuf;
35954683f7a4SShri Abhyankar     if (size == 1) nz -= extra_rows;
35964683f7a4SShri Abhyankar     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
359726fbe8dcSKarl Rupp     if (size == 1) {
359826fbe8dcSKarl Rupp       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
359926fbe8dcSKarl Rupp     }
36004683f7a4SShri Abhyankar 
36014683f7a4SShri Abhyankar     /* insert into matrix */
36024683f7a4SShri Abhyankar     jj = rstart*bs;
36034683f7a4SShri Abhyankar     for (i=0; i<m; i++) {
36044683f7a4SShri Abhyankar       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
36054683f7a4SShri Abhyankar       mycols += locrowlens[i];
36064683f7a4SShri Abhyankar       vals   += locrowlens[i];
36074683f7a4SShri Abhyankar       jj++;
36084683f7a4SShri Abhyankar     }
36094683f7a4SShri Abhyankar     /* read in other processors (except the last one) and ship out */
36104683f7a4SShri Abhyankar     for (i=1; i<size-1; i++) {
36114683f7a4SShri Abhyankar       nz   = procsnz[i];
36124683f7a4SShri Abhyankar       vals = buf;
36134683f7a4SShri Abhyankar       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3614479e424cSMichael Lange       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
36154683f7a4SShri Abhyankar     }
36164683f7a4SShri Abhyankar     /* the last proc */
36174683f7a4SShri Abhyankar     if (size != 1) {
36184683f7a4SShri Abhyankar       nz   = procsnz[i] - extra_rows;
36194683f7a4SShri Abhyankar       vals = buf;
36204683f7a4SShri Abhyankar       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
36214683f7a4SShri Abhyankar       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3622479e424cSMichael Lange       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
36234683f7a4SShri Abhyankar     }
36244683f7a4SShri Abhyankar     ierr = PetscFree(procsnz);CHKERRQ(ierr);
36254683f7a4SShri Abhyankar   } else {
36264683f7a4SShri Abhyankar     /* receive numeric values */
3627785e854fSJed Brown     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
36284683f7a4SShri Abhyankar 
36294683f7a4SShri Abhyankar     /* receive message of values*/
36304683f7a4SShri Abhyankar     vals   = buf;
36314683f7a4SShri Abhyankar     mycols = ibuf;
3632479e424cSMichael Lange     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
36334683f7a4SShri Abhyankar 
36344683f7a4SShri Abhyankar     /* insert into matrix */
36354683f7a4SShri Abhyankar     jj = rstart*bs;
36364683f7a4SShri Abhyankar     for (i=0; i<m; i++) {
36374683f7a4SShri Abhyankar       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
36384683f7a4SShri Abhyankar       mycols += locrowlens[i];
36394683f7a4SShri Abhyankar       vals   += locrowlens[i];
36404683f7a4SShri Abhyankar       jj++;
36414683f7a4SShri Abhyankar     }
36424683f7a4SShri Abhyankar   }
36434683f7a4SShri Abhyankar   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
36444683f7a4SShri Abhyankar   ierr = PetscFree(buf);CHKERRQ(ierr);
36454683f7a4SShri Abhyankar   ierr = PetscFree(ibuf);CHKERRQ(ierr);
36464683f7a4SShri Abhyankar   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
36474683f7a4SShri Abhyankar   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
36484683f7a4SShri Abhyankar   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
36494683f7a4SShri Abhyankar   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
36504683f7a4SShri Abhyankar   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
36514683f7a4SShri Abhyankar   PetscFunctionReturn(0);
36524683f7a4SShri Abhyankar }
36534683f7a4SShri Abhyankar 
36544683f7a4SShri Abhyankar #undef __FUNCT__
36554a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3656133cdb44SSatish Balay /*@
3657133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3658133cdb44SSatish Balay 
3659133cdb44SSatish Balay    Input Parameters:
3660133cdb44SSatish Balay .  mat  - the matrix
3661133cdb44SSatish Balay .  fact - factor
3662133cdb44SSatish Balay 
3663c5eb9154SBarry Smith    Not Collective, each process can use a different factor
3664fee21e36SBarry Smith 
36658c890885SBarry Smith    Level: advanced
36668c890885SBarry Smith 
3667133cdb44SSatish Balay   Notes:
36688c07d4e3SBarry Smith    This can also be set by the command line option: -mat_use_hash_table <fact>
3669133cdb44SSatish Balay 
3670133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
3671133cdb44SSatish Balay 
3672133cdb44SSatish Balay .seealso: MatSetOption()
3673133cdb44SSatish Balay @*/
36747087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3675133cdb44SSatish Balay {
36764ac538c5SBarry Smith   PetscErrorCode ierr;
36775bf65638SKris Buschelman 
36785bf65638SKris Buschelman   PetscFunctionBegin;
36794ac538c5SBarry Smith   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
36805bf65638SKris Buschelman   PetscFunctionReturn(0);
36815bf65638SKris Buschelman }
36825bf65638SKris Buschelman 
36835bf65638SKris Buschelman #undef __FUNCT__
36845bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
36857087cfbeSBarry Smith PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
36865bf65638SKris Buschelman {
368725fdafccSSatish Balay   Mat_MPIBAIJ *baij;
3688133cdb44SSatish Balay 
3689133cdb44SSatish Balay   PetscFunctionBegin;
3690133cdb44SSatish Balay   baij          = (Mat_MPIBAIJ*)mat->data;
3691133cdb44SSatish Balay   baij->ht_fact = fact;
3692133cdb44SSatish Balay   PetscFunctionReturn(0);
3693133cdb44SSatish Balay }
3694f2a5309cSSatish Balay 
36954a2ae208SSatish Balay #undef __FUNCT__
36964a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
36979230625dSJed Brown PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3698f2a5309cSSatish Balay {
3699f2a5309cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
37005fd66863SKarl Rupp 
3701f2a5309cSSatish Balay   PetscFunctionBegin;
370221e72a00SBarry Smith   if (Ad)     *Ad     = a->A;
370321e72a00SBarry Smith   if (Ao)     *Ao     = a->B;
370421e72a00SBarry Smith   if (colmap) *colmap = a->garray;
3705f2a5309cSSatish Balay   PetscFunctionReturn(0);
3706f2a5309cSSatish Balay }
370785535b8eSBarry Smith 
370885535b8eSBarry Smith /*
370985535b8eSBarry Smith     Special version for direct calls from Fortran (to eliminate two function call overheads
371085535b8eSBarry Smith */
371185535b8eSBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
371285535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
371385535b8eSBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
371485535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
371585535b8eSBarry Smith #endif
371685535b8eSBarry Smith 
371785535b8eSBarry Smith #undef __FUNCT__
371885535b8eSBarry Smith #define __FUNCT__ "matmpibiajsetvaluesblocked"
371985535b8eSBarry Smith /*@C
372085535b8eSBarry Smith   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
372185535b8eSBarry Smith 
372285535b8eSBarry Smith   Collective on Mat
372385535b8eSBarry Smith 
372485535b8eSBarry Smith   Input Parameters:
372585535b8eSBarry Smith + mat - the matrix
372685535b8eSBarry Smith . min - number of input rows
372785535b8eSBarry Smith . im - input rows
372885535b8eSBarry Smith . nin - number of input columns
372985535b8eSBarry Smith . in - input columns
373085535b8eSBarry Smith . v - numerical values input
373185535b8eSBarry Smith - addvin - INSERT_VALUES or ADD_VALUES
373285535b8eSBarry Smith 
373385535b8eSBarry Smith   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
373485535b8eSBarry Smith 
373585535b8eSBarry Smith   Level: advanced
373685535b8eSBarry Smith 
373785535b8eSBarry Smith .seealso:   MatSetValuesBlocked()
373885535b8eSBarry Smith @*/
373985535b8eSBarry Smith PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
374085535b8eSBarry Smith {
374185535b8eSBarry Smith   /* convert input arguments to C version */
374285535b8eSBarry Smith   Mat        mat  = *matin;
374385535b8eSBarry Smith   PetscInt   m    = *min, n = *nin;
374485535b8eSBarry Smith   InsertMode addv = *addvin;
374585535b8eSBarry Smith 
374685535b8eSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
374785535b8eSBarry Smith   const MatScalar *value;
374885535b8eSBarry Smith   MatScalar       *barray     = baij->barray;
3749ace3abfcSBarry Smith   PetscBool       roworiented = baij->roworiented;
375085535b8eSBarry Smith   PetscErrorCode  ierr;
375185535b8eSBarry Smith   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
375285535b8eSBarry Smith   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3753d0f46423SBarry Smith   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
375485535b8eSBarry Smith 
375585535b8eSBarry Smith   PetscFunctionBegin;
375685535b8eSBarry Smith   /* tasks normally handled by MatSetValuesBlocked() */
375726fbe8dcSKarl Rupp   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
375885535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
3759e7e72b3dSBarry Smith   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3760e32f2f54SBarry Smith   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
376185535b8eSBarry Smith #endif
376285535b8eSBarry Smith   if (mat->assembled) {
376385535b8eSBarry Smith     mat->was_assembled = PETSC_TRUE;
376485535b8eSBarry Smith     mat->assembled     = PETSC_FALSE;
376585535b8eSBarry Smith   }
376685535b8eSBarry Smith   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
376785535b8eSBarry Smith 
376885535b8eSBarry Smith 
376985535b8eSBarry Smith   if (!barray) {
3770785e854fSJed Brown     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
377185535b8eSBarry Smith     baij->barray = barray;
377285535b8eSBarry Smith   }
377385535b8eSBarry Smith 
377426fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
377526fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
377626fbe8dcSKarl Rupp 
377785535b8eSBarry Smith   for (i=0; i<m; i++) {
377885535b8eSBarry Smith     if (im[i] < 0) continue;
377985535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
3780e32f2f54SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
378185535b8eSBarry Smith #endif
378285535b8eSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
378385535b8eSBarry Smith       row = im[i] - rstart;
378485535b8eSBarry Smith       for (j=0; j<n; j++) {
378585535b8eSBarry Smith         /* If NumCol = 1 then a copy is not required */
378685535b8eSBarry Smith         if ((roworiented) && (n == 1)) {
378785535b8eSBarry Smith           barray = (MatScalar*)v + i*bs2;
378885535b8eSBarry Smith         } else if ((!roworiented) && (m == 1)) {
378985535b8eSBarry Smith           barray = (MatScalar*)v + j*bs2;
379085535b8eSBarry Smith         } else { /* Here a copy is required */
379185535b8eSBarry Smith           if (roworiented) {
379285535b8eSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
379385535b8eSBarry Smith           } else {
379485535b8eSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
379585535b8eSBarry Smith           }
379685535b8eSBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
379785535b8eSBarry Smith             for (jj=0; jj<bs; jj++) {
379885535b8eSBarry Smith               *barray++ = *value++;
379985535b8eSBarry Smith             }
380085535b8eSBarry Smith           }
380185535b8eSBarry Smith           barray -=bs2;
380285535b8eSBarry Smith         }
380385535b8eSBarry Smith 
380485535b8eSBarry Smith         if (in[j] >= cstart && in[j] < cend) {
380585535b8eSBarry Smith           col  = in[j] - cstart;
380697e5c40aSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
380726fbe8dcSKarl Rupp         } else if (in[j] < 0) continue;
380885535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
3809cb9801acSJed Brown         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
381085535b8eSBarry Smith #endif
381185535b8eSBarry Smith         else {
381285535b8eSBarry Smith           if (mat->was_assembled) {
381385535b8eSBarry Smith             if (!baij->colmap) {
3814ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381585535b8eSBarry Smith             }
381685535b8eSBarry Smith 
381785535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
381885535b8eSBarry Smith #if defined(PETSC_USE_CTABLE)
381985535b8eSBarry Smith             { PetscInt data;
382085535b8eSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3821e32f2f54SBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
382285535b8eSBarry Smith             }
382385535b8eSBarry Smith #else
3824e32f2f54SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
382585535b8eSBarry Smith #endif
382685535b8eSBarry Smith #endif
382785535b8eSBarry Smith #if defined(PETSC_USE_CTABLE)
382885535b8eSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
382985535b8eSBarry Smith             col  = (col - 1)/bs;
383085535b8eSBarry Smith #else
383185535b8eSBarry Smith             col = (baij->colmap[in[j]] - 1)/bs;
383285535b8eSBarry Smith #endif
383385535b8eSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3834ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
383585535b8eSBarry Smith               col  =  in[j];
383685535b8eSBarry Smith             }
383726fbe8dcSKarl Rupp           } else col = in[j];
383897e5c40aSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
383985535b8eSBarry Smith         }
384085535b8eSBarry Smith       }
384185535b8eSBarry Smith     } else {
384285535b8eSBarry Smith       if (!baij->donotstash) {
384385535b8eSBarry Smith         if (roworiented) {
384485535b8eSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
384585535b8eSBarry Smith         } else {
384685535b8eSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
384785535b8eSBarry Smith         }
384885535b8eSBarry Smith       }
384985535b8eSBarry Smith     }
385085535b8eSBarry Smith   }
385185535b8eSBarry Smith 
385285535b8eSBarry Smith   /* task normally handled by MatSetValuesBlocked() */
385385535b8eSBarry Smith   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
385485535b8eSBarry Smith   PetscFunctionReturn(0);
385585535b8eSBarry Smith }
3856dfb205c3SBarry Smith 
3857dfb205c3SBarry Smith #undef __FUNCT__
3858dfb205c3SBarry Smith #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3859dfb205c3SBarry Smith /*@
3860dfb205c3SBarry Smith      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3861dfb205c3SBarry Smith          CSR format the local rows.
3862dfb205c3SBarry Smith 
3863dfb205c3SBarry Smith    Collective on MPI_Comm
3864dfb205c3SBarry Smith 
3865dfb205c3SBarry Smith    Input Parameters:
3866dfb205c3SBarry Smith +  comm - MPI communicator
3867dfb205c3SBarry Smith .  bs - the block size, only a block size of 1 is supported
3868dfb205c3SBarry Smith .  m - number of local rows (Cannot be PETSC_DECIDE)
3869dfb205c3SBarry Smith .  n - This value should be the same as the local size used in creating the
3870dfb205c3SBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3871dfb205c3SBarry Smith        calculated if N is given) For square matrices n is almost always m.
3872dfb205c3SBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3873dfb205c3SBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3874dfb205c3SBarry Smith .   i - row indices
3875dfb205c3SBarry Smith .   j - column indices
3876dfb205c3SBarry Smith -   a - matrix values
3877dfb205c3SBarry Smith 
3878dfb205c3SBarry Smith    Output Parameter:
3879dfb205c3SBarry Smith .   mat - the matrix
3880dfb205c3SBarry Smith 
3881dfb205c3SBarry Smith    Level: intermediate
3882dfb205c3SBarry Smith 
3883dfb205c3SBarry Smith    Notes:
3884dfb205c3SBarry Smith        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3885dfb205c3SBarry Smith      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3886dfb205c3SBarry Smith      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3887dfb205c3SBarry Smith 
38883adadaf3SJed Brown      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
38893adadaf3SJed Brown      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
38903adadaf3SJed Brown      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
38913adadaf3SJed Brown      with column-major ordering within blocks.
38923adadaf3SJed Brown 
3893dfb205c3SBarry Smith        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3894dfb205c3SBarry Smith 
3895dfb205c3SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel
3896dfb205c3SBarry Smith 
3897dfb205c3SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
389869b1f4b7SBarry Smith           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3899dfb205c3SBarry Smith @*/
39007087cfbeSBarry Smith PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3901dfb205c3SBarry Smith {
3902dfb205c3SBarry Smith   PetscErrorCode ierr;
3903dfb205c3SBarry Smith 
3904dfb205c3SBarry Smith   PetscFunctionBegin;
3905f23aa3ddSBarry Smith   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3906dfb205c3SBarry Smith   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3907dfb205c3SBarry Smith   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3908dfb205c3SBarry Smith   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3909dfb205c3SBarry Smith   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3910d47bf9aaSJed Brown   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3911dfb205c3SBarry Smith   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3912d47bf9aaSJed Brown   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3913dfb205c3SBarry Smith   PetscFunctionReturn(0);
3914dfb205c3SBarry Smith }
3915